9 files changed, 805 insertions, 42 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index d948ca12acf0..5e3f3b75563a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
-	    hrtimer.o rwsem.o latency.o nsproxy.o
+	    hrtimer.o rwsem.o latency.o nsproxy.o srcu.o
 
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 1a58a81fb09d..4f40d923af8e 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -411,7 +411,6 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 		case AUDIT_FSGID:
 		case AUDIT_LOGINUID:
 		case AUDIT_PERS:
-		case AUDIT_ARCH:
 		case AUDIT_MSGTYPE:
 		case AUDIT_PPID:
 		case AUDIT_DEVMAJOR:
@@ -423,6 +422,14 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 		case AUDIT_ARG2:
 		case AUDIT_ARG3:
 			break;
+		/* arch is only allowed to be = or != */
+		case AUDIT_ARCH:
+			if ((f->op != AUDIT_NOT_EQUAL) && (f->op != AUDIT_EQUAL)
+					&& (f->op != AUDIT_NEGATE) && (f->op)) {
+				err = -EINVAL;
+				goto exit_free;
+			}
+			break;
 		case AUDIT_PERM:
 			if (f->val & ~15)
 				goto exit_free;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 105147631753..42f2f1179711 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -278,8 +278,11 @@ static int audit_filter_rules(struct task_struct *tsk,
 			result = audit_comparator(tsk->pid, f->op, f->val);
 			break;
 		case AUDIT_PPID:
-			if (ctx)
+			if (ctx) {
+				if (!ctx->ppid)
+					ctx->ppid = sys_getppid();
 				result = audit_comparator(ctx->ppid, f->op, f->val);
+			}
 			break;
 		case AUDIT_UID:
 			result = audit_comparator(tsk->uid, f->op, f->val);
@@ -795,7 +798,8 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 
 	/* tsk == current */
 	context->pid = tsk->pid;
-	context->ppid = sys_getppid();	/* sic.  tsk == current in all cases */
+	if (!context->ppid)
+		context->ppid = sys_getppid();
 	context->uid = tsk->uid;
 	context->gid = tsk->gid;
 	context->euid = tsk->euid;
@@ -1137,6 +1141,7 @@ void audit_syscall_entry(int arch, int major,
 	context->ctime      = CURRENT_TIME;
 	context->in_syscall = 1;
 	context->auditable  = !!(state == AUDIT_RECORD_CONTEXT);
+	context->ppid       = 0;
 }
 
 /**
@@ -1352,7 +1357,13 @@ void __audit_inode_child(const char *dname, const struct inode *inode,
 		}
 
 update_context:
-	idx = context->name_count++;
+	idx = context->name_count;
+	if (context->name_count == AUDIT_NAMES) {
+		printk(KERN_DEBUG "name_count maxed and losing %s\n",
+			found_name ?: "(null)");
+		return;
+	}
+	context->name_count++;
 #if AUDIT_DEBUG
 	context->ino_count++;
 #endif
@@ -1370,7 +1381,16 @@ update_context:
 	/* A parent was not found in audit_names, so copy the inode data for the
 	 * provided parent. */
 	if (!found_name) {
-		idx = context->name_count++;
+		idx = context->name_count;
+		if (context->name_count == AUDIT_NAMES) {
+			printk(KERN_DEBUG
+				"name_count maxed and losing parent inode data: dev=%02x:%02x, inode=%lu",
+				MAJOR(parent->i_sb->s_dev),
+				MINOR(parent->i_sb->s_dev),
+				parent->i_ino);
+			return;
+		}
+		context->name_count++;
 #if AUDIT_DEBUG
 		context->ino_count++;
 #endif
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 736cb0bd498f..4cf65f5c6a74 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -18,6 +18,69 @@
 #include "internals.h"
 
 /**
+ *	dynamic_irq_init - initialize a dynamically allocated irq
+ *	@irq:	irq number to initialize
+ */
+void dynamic_irq_init(unsigned int irq)
+{
+	struct irq_desc *desc;
+	unsigned long flags;
+
+	if (irq >= NR_IRQS) {
+		printk(KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
+		WARN_ON(1);
+		return;
+	}
+
+	/* Ensure we don't have left over values from a previous use of this irq */
+	desc = irq_desc + irq;
+	spin_lock_irqsave(&desc->lock, flags);
+	desc->status = IRQ_DISABLED;
+	desc->chip = &no_irq_chip;
+	desc->handle_irq = handle_bad_irq;
+	desc->depth = 1;
+	desc->handler_data = NULL;
+	desc->chip_data = NULL;
+	desc->action = NULL;
+	desc->irq_count = 0;
+	desc->irqs_unhandled = 0;
+#ifdef CONFIG_SMP
+	desc->affinity = CPU_MASK_ALL;
+#endif
+	spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/**
+ *	dynamic_irq_cleanup - cleanup a dynamically allocated irq
+ *	@irq:	irq number to initialize
+ */
+void dynamic_irq_cleanup(unsigned int irq)
+{
+	struct irq_desc *desc;
+	unsigned long flags;
+
+	if (irq >= NR_IRQS) {
+		printk(KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq);
+		WARN_ON(1);
+		return;
+	}
+
+	desc = irq_desc + irq;
+	spin_lock_irqsave(&desc->lock, flags);
+	if (desc->action) {
+		spin_unlock_irqrestore(&desc->lock, flags);
+		printk(KERN_ERR "Destroying IRQ%d without calling free_irq\n",
+			irq);
+		WARN_ON(1);
+		return;
+	}
+	desc->handle_irq = handle_bad_irq;
+	desc->chip = &no_irq_chip;
+	spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+
+/**
  *	set_irq_chip - set the irq chip for an irq
  *	@irq:	irq number
  *	@chip:	pointer to irq chip description structure
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index a57ebe9fa6f6..4baa3bbcd25a 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -7,17 +7,17 @@ void set_pending_irq(unsigned int irq, cpumask_t mask)
 	unsigned long flags;
 
 	spin_lock_irqsave(&desc->lock, flags);
-	desc->move_irq = 1;
+	desc->status |= IRQ_MOVE_PENDING;
 	irq_desc[irq].pending_mask = mask;
 	spin_unlock_irqrestore(&desc->lock, flags);
 }
 
-void move_native_irq(int irq)
+void move_masked_irq(int irq)
 {
 	struct irq_desc *desc = irq_desc + irq;
 	cpumask_t tmp;
 
-	if (likely(!desc->move_irq))
+	if (likely(!(desc->status & IRQ_MOVE_PENDING)))
 		return;
 
 	/*
@@ -28,7 +28,7 @@ void move_native_irq(int irq)
 		return;
 	}
 
-	desc->move_irq = 0;
+	desc->status &= ~IRQ_MOVE_PENDING;
 
 	if (unlikely(cpus_empty(irq_desc[irq].pending_mask)))
 		return;
@@ -48,15 +48,29 @@ void move_native_irq(int irq)
 	 * when an active trigger is comming in. This could
 	 * cause some ioapics to mal-function.
 	 * Being paranoid i guess!
+	 *
+	 * For correct operation this depends on the caller
+	 * masking the irqs.
 	 */
 	if (likely(!cpus_empty(tmp))) {
-		if (likely(!(desc->status & IRQ_DISABLED)))
-			desc->chip->disable(irq);
-
 		desc->chip->set_affinity(irq,tmp);
-
-		if (likely(!(desc->status & IRQ_DISABLED)))
-			desc->chip->enable(irq);
 	}
 	cpus_clear(irq_desc[irq].pending_mask);
 }
+
+void move_native_irq(int irq)
+{
+	struct irq_desc *desc = irq_desc + irq;
+
+	if (likely(!(desc->status & IRQ_MOVE_PENDING)))
+		return;
+
+	if (likely(!(desc->status & IRQ_DISABLED)))
+		desc->chip->disable(irq);
+
+	move_masked_irq(irq);
+
+	if (likely(!(desc->status & IRQ_DISABLED)))
+		desc->chip->enable(irq);
+}
+
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 523e46483b99..26bb5ffe1ef1 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -71,9 +71,6 @@ static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
 static int blimit = 10;
 static int qhimark = 10000;
 static int qlowmark = 100;
-#ifdef CONFIG_SMP
-static int rsinterval = 1000;
-#endif
 
 static atomic_t rcu_barrier_cpu_count;
 static DEFINE_MUTEX(rcu_barrier_mutex);
@@ -86,8 +83,8 @@ static void force_quiescent_state(struct rcu_data *rdp,
 	int cpu;
 	cpumask_t cpumask;
 	set_need_resched();
-	if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) {
-		rdp->last_rs_qlen = rdp->qlen;
+	if (unlikely(!rcp->signaled)) {
+		rcp->signaled = 1;
 		/*
 		 * Don't send IPI to itself. With irqs disabled,
 		 * rdp->cpu is the current cpu.
@@ -301,6 +298,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
 		smp_mb();
 		cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
 
+		rcp->signaled = 0;
 	}
 }
 
@@ -628,9 +626,6 @@ void synchronize_rcu(void)
 module_param(blimit, int, 0);
 module_param(qhimark, int, 0);
 module_param(qlowmark, int, 0);
-#ifdef CONFIG_SMP
-module_param(rsinterval, int, 0);
-#endif
 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
 EXPORT_SYMBOL_GPL(call_rcu);
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 23446e91cded..e2bda18f6f42 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -15,9 +15,10 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  *
- * Copyright (C) IBM Corporation, 2005
+ * Copyright (C) IBM Corporation, 2005, 2006
  *
  * Authors: Paul E. McKenney <paulmck@us.ibm.com>
+ *          Josh Triplett <josh@freedesktop.org>
  *
  * See also:  Documentation/RCU/torture.txt
  */
@@ -44,19 +45,25 @@
 #include <linux/delay.h>
 #include <linux/byteorder/swabb.h>
 #include <linux/stat.h>
+#include <linux/srcu.h>
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
+              "Josh Triplett <josh@freedesktop.org>");
 
 static int nreaders = -1;	/* # reader threads, defaults to 2*ncpus */
+static int nfakewriters = 4;	/* # fake writer threads */
 static int stat_interval;	/* Interval between stats, in seconds. */
 				/*  Defaults to "only at end of test". */
 static int verbose;		/* Print more debug info. */
 static int test_no_idle_hz;	/* Test RCU's support for tickless idle CPUs. */
 static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/
-static char *torture_type = "rcu"; /* What to torture. */
+static char *torture_type = "rcu"; /* What RCU implementation to torture. */
 
 module_param(nreaders, int, 0);
 MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
+module_param(nfakewriters, int, 0);
+MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads");
 module_param(stat_interval, int, 0);
 MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
 module_param(verbose, bool, 0);
@@ -66,7 +73,7 @@ MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
 module_param(shuffle_interval, int, 0);
 MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
 module_param(torture_type, charp, 0);
-MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh)");
+MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
 
 #define TORTURE_FLAG "-torture:"
 #define PRINTK_STRING(s) \
@@ -80,6 +87,7 @@ static char printk_buf[4096];
 
 static int nrealreaders;
 static struct task_struct *writer_task;
+static struct task_struct **fakewriter_tasks;
 static struct task_struct **reader_tasks;
 static struct task_struct *stats_task;
 static struct task_struct *shuffler_task;
@@ -104,11 +112,12 @@ static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) =
 static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) =
 	{ 0 };
 static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1];
-atomic_t n_rcu_torture_alloc;
-atomic_t n_rcu_torture_alloc_fail;
-atomic_t n_rcu_torture_free;
-atomic_t n_rcu_torture_mberror;
-atomic_t n_rcu_torture_error;
+static atomic_t n_rcu_torture_alloc;
+static atomic_t n_rcu_torture_alloc_fail;
+static atomic_t n_rcu_torture_free;
+static atomic_t n_rcu_torture_mberror;
+static atomic_t n_rcu_torture_error;
+static struct list_head rcu_torture_removed;
 
 /*
  * Allocate an element from the rcu_tortures pool.
@@ -145,7 +154,7 @@ rcu_torture_free(struct rcu_torture *p)
 
 struct rcu_random_state {
 	unsigned long rrs_state;
-	unsigned long rrs_count;
+	long rrs_count;
 };
 
 #define RCU_RANDOM_MULT 39916801  /* prime */
@@ -158,7 +167,7 @@ struct rcu_random_state {
  * Crude but fast random-number generator.  Uses a linear congruential
  * generator, with occasional help from get_random_bytes().
  */
-static long
+static unsigned long
 rcu_random(struct rcu_random_state *rrsp)
 {
 	long refresh;
@@ -180,9 +189,11 @@ struct rcu_torture_ops {
 	void (*init)(void);
 	void (*cleanup)(void);
 	int (*readlock)(void);
+	void (*readdelay)(struct rcu_random_state *rrsp);
 	void (*readunlock)(int idx);
 	int (*completed)(void);
 	void (*deferredfree)(struct rcu_torture *p);
+	void (*sync)(void);
 	int (*stats)(char *page);
 	char *name;
 };
@@ -198,6 +209,18 @@ static int rcu_torture_read_lock(void) __acquires(RCU)
 	return 0;
 }
 
+static void rcu_read_delay(struct rcu_random_state *rrsp)
+{
+	long delay;
+	const long longdelay = 200;
+
+	/* We want there to be long-running readers, but not all the time. */
+
+	delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay);
+	if (!delay)
+		udelay(longdelay);
+}
+
 static void rcu_torture_read_unlock(int idx) __releases(RCU)
 {
 	rcu_read_unlock();
@@ -239,13 +262,54 @@ static struct rcu_torture_ops rcu_ops = {
 	.init = NULL,
 	.cleanup = NULL,
 	.readlock = rcu_torture_read_lock,
+	.readdelay = rcu_read_delay,
 	.readunlock = rcu_torture_read_unlock,
 	.completed = rcu_torture_completed,
 	.deferredfree = rcu_torture_deferred_free,
+	.sync = synchronize_rcu,
 	.stats = NULL,
 	.name = "rcu"
 };
 
+static void rcu_sync_torture_deferred_free(struct rcu_torture *p)
+{
+	int i;
+	struct rcu_torture *rp;
+	struct rcu_torture *rp1;
+
+	cur_ops->sync();
+	list_add(&p->rtort_free, &rcu_torture_removed);
+	list_for_each_entry_safe(rp, rp1, &rcu_torture_removed, rtort_free) {
+		i = rp->rtort_pipe_count;
+		if (i > RCU_TORTURE_PIPE_LEN)
+			i = RCU_TORTURE_PIPE_LEN;
+		atomic_inc(&rcu_torture_wcount[i]);
+		if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
+			rp->rtort_mbtest = 0;
+			list_del(&rp->rtort_free);
+			rcu_torture_free(rp);
+		}
+	}
+}
+
+static void rcu_sync_torture_init(void)
+{
+	INIT_LIST_HEAD(&rcu_torture_removed);
+}
+
+static struct rcu_torture_ops rcu_sync_ops = {
+	.init = rcu_sync_torture_init,
+	.cleanup = NULL,
+	.readlock = rcu_torture_read_lock,
+	.readdelay = rcu_read_delay,
+	.readunlock = rcu_torture_read_unlock,
+	.completed = rcu_torture_completed,
+	.deferredfree = rcu_sync_torture_deferred_free,
+	.sync = synchronize_rcu,
+	.stats = NULL,
+	.name = "rcu_sync"
+};
+
 /*
  * Definitions for rcu_bh torture testing.
  */
@@ -271,19 +335,176 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
 	call_rcu_bh(&p->rtort_rcu, rcu_torture_cb);
 }
 
+struct rcu_bh_torture_synchronize {
+	struct rcu_head head;
+	struct completion completion;
+};
+
+static void rcu_bh_torture_wakeme_after_cb(struct rcu_head *head)
+{
+	struct rcu_bh_torture_synchronize *rcu;
+
+	rcu = container_of(head, struct rcu_bh_torture_synchronize, head);
+	complete(&rcu->completion);
+}
+
+static void rcu_bh_torture_synchronize(void)
+{
+	struct rcu_bh_torture_synchronize rcu;
+
+	init_completion(&rcu.completion);
+	call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb);
+	wait_for_completion(&rcu.completion);
+}
+
 static struct rcu_torture_ops rcu_bh_ops = {
 	.init = NULL,
 	.cleanup = NULL,
 	.readlock = rcu_bh_torture_read_lock,
+	.readdelay = rcu_read_delay,  /* just reuse rcu's version. */
 	.readunlock = rcu_bh_torture_read_unlock,
 	.completed = rcu_bh_torture_completed,
 	.deferredfree = rcu_bh_torture_deferred_free,
+	.sync = rcu_bh_torture_synchronize,
 	.stats = NULL,
 	.name = "rcu_bh"
 };
 
+static struct rcu_torture_ops rcu_bh_sync_ops = {
+	.init = rcu_sync_torture_init,
+	.cleanup = NULL,
+	.readlock = rcu_bh_torture_read_lock,
+	.readdelay = rcu_read_delay,  /* just reuse rcu's version. */
+	.readunlock = rcu_bh_torture_read_unlock,
+	.completed = rcu_bh_torture_completed,
+	.deferredfree = rcu_sync_torture_deferred_free,
+	.sync = rcu_bh_torture_synchronize,
+	.stats = NULL,
+	.name = "rcu_bh_sync"
+};
+
+/*
+ * Definitions for srcu torture testing.
+ */
+
+static struct srcu_struct srcu_ctl;
+
+static void srcu_torture_init(void)
+{
+	init_srcu_struct(&srcu_ctl);
+	rcu_sync_torture_init();
+}
+
+static void srcu_torture_cleanup(void)
+{
+	synchronize_srcu(&srcu_ctl);
+	cleanup_srcu_struct(&srcu_ctl);
+}
+
+static int srcu_torture_read_lock(void)
+{
+	return srcu_read_lock(&srcu_ctl);
+}
+
+static void srcu_read_delay(struct rcu_random_state *rrsp)
+{
+	long delay;
+	const long uspertick = 1000000 / HZ;
+	const long longdelay = 10;
+
+	/* We want there to be long-running readers, but not all the time. */
+
+	delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick);
+	if (!delay)
+		schedule_timeout_interruptible(longdelay);
+}
+
+static void srcu_torture_read_unlock(int idx)
+{
+	srcu_read_unlock(&srcu_ctl, idx);
+}
+
+static int srcu_torture_completed(void)
+{
+	return srcu_batches_completed(&srcu_ctl);
+}
+
+static void srcu_torture_synchronize(void)
+{
+	synchronize_srcu(&srcu_ctl);
+}
+
+static int srcu_torture_stats(char *page)
+{
+	int cnt = 0;
+	int cpu;
+	int idx = srcu_ctl.completed & 0x1;
+
+	cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):",
+		       torture_type, TORTURE_FLAG, idx);
+	for_each_possible_cpu(cpu) {
+		cnt += sprintf(&page[cnt], " %d(%d,%d)", cpu,
+			       per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx],
+			       per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]);
+	}
+	cnt += sprintf(&page[cnt], "\n");
+	return cnt;
+}
+
+static struct rcu_torture_ops srcu_ops = {
+	.init = srcu_torture_init,
+	.cleanup = srcu_torture_cleanup,
+	.readlock = srcu_torture_read_lock,
+	.readdelay = srcu_read_delay,
+	.readunlock = srcu_torture_read_unlock,
+	.completed = srcu_torture_completed,
+	.deferredfree = rcu_sync_torture_deferred_free,
+	.sync = srcu_torture_synchronize,
+	.stats = srcu_torture_stats,
+	.name = "srcu"
+};
+
+/*
+ * Definitions for sched torture testing.
+ */
+
+static int sched_torture_read_lock(void)
+{
+	preempt_disable();
+	return 0;
+}
+
+static void sched_torture_read_unlock(int idx)
+{
+	preempt_enable();
+}
+
+static int sched_torture_completed(void)
+{
+	return 0;
+}
+
+static void sched_torture_synchronize(void)
+{
+	synchronize_sched();
+}
+
+static struct rcu_torture_ops sched_ops = {
+	.init = rcu_sync_torture_init,
+	.cleanup = NULL,
+	.readlock = sched_torture_read_lock,
+	.readdelay = rcu_read_delay,  /* just reuse rcu's version. */
+	.readunlock = sched_torture_read_unlock,
+	.completed = sched_torture_completed,
+	.deferredfree = rcu_sync_torture_deferred_free,
+	.sync = sched_torture_synchronize,
+	.stats = NULL,
+	.name = "sched"
+};
+
 static struct rcu_torture_ops *torture_ops[] =
-	{ &rcu_ops, &rcu_bh_ops, NULL };
+	{ &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, &srcu_ops,
+	  &sched_ops, NULL };
 
 /*
  * RCU torture writer kthread.  Repeatedly substitutes a new structure
@@ -330,6 +551,30 @@ rcu_torture_writer(void *arg)
 }
 
 /*
+ * RCU torture fake writer kthread.  Repeatedly calls sync, with a random
+ * delay between calls.
+ */
+static int
+rcu_torture_fakewriter(void *arg)
+{
+	DEFINE_RCU_RANDOM(rand);
+
+	VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started");
+	set_user_nice(current, 19);
+
+	do {
+		schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
+		udelay(rcu_random(&rand) & 0x3ff);
+		cur_ops->sync();
+	} while (!kthread_should_stop() && !fullstop);
+
+	VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping");
+	while (!kthread_should_stop())
+		schedule_timeout_uninterruptible(1);
+	return 0;
+}
+
+/*
  * RCU torture reader kthread.  Repeatedly dereferences rcu_torture_current,
  * incrementing the corresponding element of the pipeline array.  The
  * counter in the element should never be greater than 1, otherwise, the
@@ -359,7 +604,7 @@ rcu_torture_reader(void *arg)
 		}
 		if (p->rtort_mbtest == 0)
 			atomic_inc(&n_rcu_torture_mberror);
-		udelay(rcu_random(&rand) & 0x7f);
+		cur_ops->readdelay(&rand);
 		preempt_disable();
 		pipe_count = p->rtort_pipe_count;
 		if (pipe_count > RCU_TORTURE_PIPE_LEN) {
@@ -483,7 +728,7 @@ static int rcu_idle_cpu;	/* Force all torture tasks off this CPU */
 /* Shuffle tasks such that we allow @rcu_idle_cpu to become idle. A special case
  * is when @rcu_idle_cpu = -1, when we allow the tasks to run on all CPUs.
  */
-void rcu_torture_shuffle_tasks(void)
+static void rcu_torture_shuffle_tasks(void)
 {
 	cpumask_t tmp_mask = CPU_MASK_ALL;
 	int i;
@@ -507,6 +752,12 @@ void rcu_torture_shuffle_tasks(void)
 				set_cpus_allowed(reader_tasks[i], tmp_mask);
 	}
 
+	if (fakewriter_tasks != NULL) {
+		for (i = 0; i < nfakewriters; i++)
+			if (fakewriter_tasks[i])
+				set_cpus_allowed(fakewriter_tasks[i], tmp_mask);
+	}
+
 	if (writer_task)
 		set_cpus_allowed(writer_task, tmp_mask);
 
@@ -540,11 +791,12 @@ rcu_torture_shuffle(void *arg)
 static inline void
 rcu_torture_print_module_parms(char *tag)
 {
-	printk(KERN_ALERT "%s" TORTURE_FLAG "--- %s: nreaders=%d "
+	printk(KERN_ALERT "%s" TORTURE_FLAG
+		"--- %s: nreaders=%d nfakewriters=%d "
 		"stat_interval=%d verbose=%d test_no_idle_hz=%d "
 		"shuffle_interval = %d\n",
-		torture_type, tag, nrealreaders, stat_interval, verbose,
-		test_no_idle_hz, shuffle_interval);
+		torture_type, tag, nrealreaders, nfakewriters,
+		stat_interval, verbose, test_no_idle_hz, shuffle_interval);
 }
 
 static void
@@ -579,6 +831,19 @@ rcu_torture_cleanup(void)
 	}
 	rcu_torture_current = NULL;
 
+	if (fakewriter_tasks != NULL) {
+		for (i = 0; i < nfakewriters; i++) {
+			if (fakewriter_tasks[i] != NULL) {
+				VERBOSE_PRINTK_STRING(
+					"Stopping rcu_torture_fakewriter task");
+				kthread_stop(fakewriter_tasks[i]);
+			}
+			fakewriter_tasks[i] = NULL;
+		}
+		kfree(fakewriter_tasks);
+		fakewriter_tasks = NULL;
+	}
+
 	if (stats_task != NULL) {
 		VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task");
 		kthread_stop(stats_task);
@@ -666,7 +931,25 @@ rcu_torture_init(void)
 		writer_task = NULL;
 		goto unwind;
 	}
-	reader_tasks = kmalloc(nrealreaders * sizeof(reader_tasks[0]),
+	fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]),
+	                           GFP_KERNEL);
+	if (fakewriter_tasks == NULL) {
+		VERBOSE_PRINTK_ERRSTRING("out of memory");
+		firsterr = -ENOMEM;
+		goto unwind;
+	}
+	for (i = 0; i < nfakewriters; i++) {
+		VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task");
+		fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL,
+		                                  "rcu_torture_fakewriter");
+		if (IS_ERR(fakewriter_tasks[i])) {
+			firsterr = PTR_ERR(fakewriter_tasks[i]);
+			VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter");
+			fakewriter_tasks[i] = NULL;
+			goto unwind;
+		}
+	}
+	reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]),
 			       GFP_KERNEL);
 	if (reader_tasks == NULL) {
 		VERBOSE_PRINTK_ERRSTRING("out of memory");
diff --git a/kernel/srcu.c b/kernel/srcu.c
new file mode 100644
index 000000000000..3507cabe963b
--- /dev/null
+++ b/kernel/srcu.c
@@ -0,0 +1,258 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		Documentation/RCU/ *.txt
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/srcu.h>
+
+/**
+ * init_srcu_struct - initialize a sleep-RCU structure
+ * @sp: structure to initialize.
+ *
+ * Must invoke this on a given srcu_struct before passing that srcu_struct
+ * to any other function.  Each srcu_struct represents a separate domain
+ * of SRCU protection.
+ */
+int init_srcu_struct(struct srcu_struct *sp)
+{
+	sp->completed = 0;
+	mutex_init(&sp->mutex);
+	sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
+	return (sp->per_cpu_ref ? 0 : -ENOMEM);
+}
+
+/*
+ * srcu_readers_active_idx -- returns approximate number of readers
+ *	active on the specified rank of per-CPU counters.
+ */
+
+static int srcu_readers_active_idx(struct srcu_struct *sp, int idx)
+{
+	int cpu;
+	int sum;
+
+	sum = 0;
+	for_each_possible_cpu(cpu)
+		sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx];
+	return sum;
+}
+
+/**
+ * srcu_readers_active - returns approximate number of readers.
+ * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
+ *
+ * Note that this is not an atomic primitive, and can therefore suffer
+ * severe errors when invoked on an active srcu_struct.  That said, it
+ * can be useful as an error check at cleanup time.
+ */
+int srcu_readers_active(struct srcu_struct *sp)
+{
+	return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1);
+}
+
+/**
+ * cleanup_srcu_struct - deconstruct a sleep-RCU structure
+ * @sp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory.
+ */
+void cleanup_srcu_struct(struct srcu_struct *sp)
+{
+	int sum;
+
+	sum = srcu_readers_active(sp);
+	WARN_ON(sum);  /* Leakage unless caller handles error. */
+	if (sum != 0)
+		return;
+	free_percpu(sp->per_cpu_ref);
+	sp->per_cpu_ref = NULL;
+}
+
+/**
+ * srcu_read_lock - register a new reader for an SRCU-protected structure.
+ * @sp: srcu_struct in which to register the new reader.
+ *
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct.  Must be called from process context.
+ * Returns an index that must be passed to the matching srcu_read_unlock().
+ */
+int srcu_read_lock(struct srcu_struct *sp)
+{
+	int idx;
+
+	preempt_disable();
+	idx = sp->completed & 0x1;
+	barrier();  /* ensure compiler looks -once- at sp->completed. */
+	per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]++;
+	srcu_barrier();  /* ensure compiler won't misorder critical section. */
+	preempt_enable();
+	return idx;
+}
+
+/**
+ * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
+ * @sp: srcu_struct in which to unregister the old reader.
+ * @idx: return value from corresponding srcu_read_lock().
+ *
+ * Removes the count for the old reader from the appropriate per-CPU
+ * element of the srcu_struct.  Note that this may well be a different
+ * CPU than that which was incremented by the corresponding srcu_read_lock().
+ * Must be called from process context.
+ */
+void srcu_read_unlock(struct srcu_struct *sp, int idx)
+{
+	preempt_disable();
+	srcu_barrier();  /* ensure compiler won't misorder critical section. */
+	per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--;
+	preempt_enable();
+}
+
+/**
+ * synchronize_srcu - wait for prior SRCU read-side critical-section completion
+ * @sp: srcu_struct with which to synchronize.
+ *
+ * Flip the completed counter, and wait for the old count to drain to zero.
+ * As with classic RCU, the updater must use some separate means of
+ * synchronizing concurrent updates.  Can block; must be called from
+ * process context.
+ *
+ * Note that it is illegal to call synchornize_srcu() from the corresponding
+ * SRCU read-side critical section; doing so will result in deadlock.
+ * However, it is perfectly legal to call synchronize_srcu() on one
+ * srcu_struct from some other srcu_struct's read-side critical section.
+ */
+void synchronize_srcu(struct srcu_struct *sp)
+{
+	int idx;
+
+	idx = sp->completed;
+	mutex_lock(&sp->mutex);
+
+	/*
+	 * Check to see if someone else did the work for us while we were
+	 * waiting to acquire the lock.  We need -two- advances of
+	 * the counter, not just one.  If there was but one, we might have
+	 * shown up -after- our helper's first synchronize_sched(), thus
+	 * having failed to prevent CPU-reordering races with concurrent
+	 * srcu_read_unlock()s on other CPUs (see comment below).  So we
+	 * either (1) wait for two or (2) supply the second ourselves.
+	 */
+
+	if ((sp->completed - idx) >= 2) {
+		mutex_unlock(&sp->mutex);
+		return;
+	}
+
+	synchronize_sched();  /* Force memory barrier on all CPUs. */
+
+	/*
+	 * The preceding synchronize_sched() ensures that any CPU that
+	 * sees the new value of sp->completed will also see any preceding
+	 * changes to data structures made by this CPU.  This prevents
+	 * some other CPU from reordering the accesses in its SRCU
+	 * read-side critical section to precede the corresponding
+	 * srcu_read_lock() -- ensuring that such references will in
+	 * fact be protected.
+	 *
+	 * So it is now safe to do the flip.
+	 */
+
+	idx = sp->completed & 0x1;
+	sp->completed++;
+
+	synchronize_sched();  /* Force memory barrier on all CPUs. */
+
+	/*
+	 * At this point, because of the preceding synchronize_sched(),
+	 * all srcu_read_lock() calls using the old counters have completed.
+	 * Their corresponding critical sections might well be still
+	 * executing, but the srcu_read_lock() primitives themselves
+	 * will have finished executing.
+	 */
+
+	while (srcu_readers_active_idx(sp, idx))
+		schedule_timeout_interruptible(1);
+
+	synchronize_sched();  /* Force memory barrier on all CPUs. */
+
+	/*
+	 * The preceding synchronize_sched() forces all srcu_read_unlock()
+	 * primitives that were executing concurrently with the preceding
+	 * for_each_possible_cpu() loop to have completed by this point.
+	 * More importantly, it also forces the corresponding SRCU read-side
+	 * critical sections to have also completed, and the corresponding
+	 * references to SRCU-protected data items to be dropped.
+	 *
+	 * Note:
+	 *
+	 *	Despite what you might think at first glance, the
+	 *	preceding synchronize_sched() -must- be within the
+	 *	critical section ended by the following mutex_unlock().
+	 *	Otherwise, a task taking the early exit can race
+	 *	with a srcu_read_unlock(), which might have executed
+	 *	just before the preceding srcu_readers_active() check,
+	 *	and whose CPU might have reordered the srcu_read_unlock()
+	 *	with the preceding critical section.  In this case, there
+	 *	is nothing preventing the synchronize_sched() task that is
+	 *	taking the early exit from freeing a data structure that
+	 *	is still being referenced (out of order) by the task
+	 *	doing the srcu_read_unlock().
+	 *
+	 *	Alternatively, the comparison with "2" on the early exit
+	 *	could be changed to "3", but this increases synchronize_srcu()
+	 *	latency for bulk loads.  So the current code is preferred.
+	 */
+
+	mutex_unlock(&sp->mutex);
+}
+
+/**
+ * srcu_batches_completed - return batches completed.
+ * @sp: srcu_struct on which to report batch completion.
+ *
+ * Report the number of batches, correlated with, but not necessarily
+ * precisely the same as, the number of grace periods that have elapsed.
+ */
+
+long srcu_batches_completed(struct srcu_struct *sp)
+{
+	return sp->completed;
+}
+
+EXPORT_SYMBOL_GPL(init_srcu_struct);
+EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
+EXPORT_SYMBOL_GPL(srcu_read_lock);
+EXPORT_SYMBOL_GPL(srcu_read_unlock);
+EXPORT_SYMBOL_GPL(synchronize_srcu);
+EXPORT_SYMBOL_GPL(srcu_batches_completed);
+EXPORT_SYMBOL_GPL(srcu_readers_active);
diff --git a/kernel/sys.c b/kernel/sys.c
index 2314867ae34f..98489d82801b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -153,7 +153,7 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
 
 /*
  *	Atomic notifier chain routines.  Registration and unregistration
- *	use a mutex, and call_chain is synchronized by RCU (no locks).
+ *	use a spinlock, and call_chain is synchronized by RCU (no locks).
  */
 
 /**
@@ -401,6 +401,129 @@ int raw_notifier_call_chain(struct raw_notifier_head *nh,
 
 EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
 
+/*
+ *	SRCU notifier chain routines.    Registration and unregistration
+ *	use a mutex, and call_chain is synchronized by SRCU (no locks).
+ */
+
+/**
+ *	srcu_notifier_chain_register - Add notifier to an SRCU notifier chain
+ *	@nh: Pointer to head of the SRCU notifier chain
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to an SRCU notifier chain.
+ *	Must be called in process context.
+ *
+ *	Currently always returns zero.
+ */
+
+int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call mutex_lock().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_register(&nh->head, n);
+
+	mutex_lock(&nh->mutex);
+	ret = notifier_chain_register(&nh->head, n);
+	mutex_unlock(&nh->mutex);
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(srcu_notifier_chain_register);
+
+/**
+ *	srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain
+ *	@nh: Pointer to head of the SRCU notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from an SRCU notifier chain.
+ *	Must be called from process context.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call mutex_lock().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_unregister(&nh->head, n);
+
+	mutex_lock(&nh->mutex);
+	ret = notifier_chain_unregister(&nh->head, n);
+	mutex_unlock(&nh->mutex);
+	synchronize_srcu(&nh->srcu);
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);
+
+/**
+ *	srcu_notifier_call_chain - Call functions in an SRCU notifier chain
+ *	@nh: Pointer to head of the SRCU notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in a process context, so they are allowed to block.
+ *
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then srcu_notifier_call_chain
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise the return value is the return value
+ *	of the last notifier function called.
+ */
+
+int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
+		unsigned long val, void *v)
+{
+	int ret;
+	int idx;
+
+	idx = srcu_read_lock(&nh->srcu);
+	ret = notifier_call_chain(&nh->head, val, v);
+	srcu_read_unlock(&nh->srcu, idx);
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);
+
+/**
+ *	srcu_init_notifier_head - Initialize an SRCU notifier head
+ *	@nh: Pointer to head of the srcu notifier chain
+ *
+ *	Unlike other sorts of notifier heads, SRCU notifier heads require
+ *	dynamic initialization.  Be sure to call this routine before
+ *	calling any of the other SRCU notifier routines for this head.
+ *
+ *	If an SRCU notifier head is deallocated, it must first be cleaned
+ *	up by calling srcu_cleanup_notifier_head().  Otherwise the head's
+ *	per-cpu data (used by the SRCU mechanism) will leak.
+ */
+
+void srcu_init_notifier_head(struct srcu_notifier_head *nh)
+{
+	mutex_init(&nh->mutex);
+	if (init_srcu_struct(&nh->srcu) < 0)
+		BUG();
+	nh->head = NULL;
+}
+
+EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
+
 /**
  *	register_reboot_notifier - Register function to be called at reboot time
  *	@nb: Info about notifier function to be called