From 151766fce8bee0e3e6076c8b829f9fcc0a2412ae Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Sat, 28 Apr 2012 14:30:40 +0800
Subject: Revert "x86/platform: Add a wallclock_init func to x86_platforms ops"

This reverts commit cf8ff6b6ab0e99dd3058852f4ec76a6140abadec.

Just found this commit is a function duplicatation of commit 6b617e22
"x86/platform: Add a wallclock_init func to x86_init.timers ops".
Let's revert it and sorry for the noise.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Alan Cox <alan@linux.intel.com>
Cc: Dirk Brandewie <dirk.brandewie@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/x86_init.h | 2 --
 arch/x86/kernel/setup.c         | 2 --
 arch/x86/kernel/x86_init.c      | 2 --
 3 files changed, 6 deletions(-)

diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index c090af10ac7d..42d2ae18dab2 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -156,7 +156,6 @@ struct x86_cpuinit_ops {
 /**
  * struct x86_platform_ops - platform specific runtime functions
  * @calibrate_tsc:		calibrate TSC
- * @wallclock_init:		init the wallclock device
  * @get_wallclock:		get time from HW clock like RTC etc.
  * @set_wallclock:		set time back to HW clock
  * @is_untracked_pat_range	exclude from PAT logic
@@ -167,7 +166,6 @@ struct x86_cpuinit_ops {
  */
 struct x86_platform_ops {
 	unsigned long (*calibrate_tsc)(void);
-	void (*wallclock_init)(void);
 	unsigned long (*get_wallclock)(void);
 	int (*set_wallclock)(unsigned long nowtime);
 	void (*iommu_shutdown)(void);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 366c688d619e..58a07b10812c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1027,8 +1027,6 @@ void __init setup_arch(char **cmdline_p)
 
 	x86_init.timers.wallclock_init();
 
-	x86_platform.wallclock_init();
-
 	mcheck_init();
 
 	arch_init_ideal_nops();
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 35c5e543f550..9f3167e891ef 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -29,7 +29,6 @@ void __init x86_init_uint_noop(unsigned int unused) { }
 void __init x86_init_pgd_noop(pgd_t *unused) { }
 int __init iommu_init_noop(void) { return 0; }
 void iommu_shutdown_noop(void) { }
-void wallclock_init_noop(void) { }
 
 /*
  * The platform setup functions are preset with the default functions
@@ -101,7 +100,6 @@ static int default_i8042_detect(void) { return 1; };
 
 struct x86_platform_ops x86_platform = {
 	.calibrate_tsc			= native_calibrate_tsc,
-	.wallclock_init			= wallclock_init_noop,
 	.get_wallclock			= mach_get_cmos_time,
 	.set_wallclock			= mach_set_rtc_mmss,
 	.iommu_shutdown			= iommu_shutdown_noop,
-- 
cgit v1.2.3


From f841d792e38f75f5e25b0b66f7b5d235d180a735 Mon Sep 17 00:00:00 2001
From: Jiang Liu <liuj97@gmail.com>
Date: Fri, 30 Mar 2012 23:11:35 +0800
Subject: x86: Return IRQ_SET_MASK_OK_NOCOPY from irq affinity functions

The interrupt chip irq_set_affinity() functions copy the affinity mask
to irq_data->affinity but return 0, i.e. IRQ_SET_MASK_OK.
IRQ_SET_MASK_OK causes the core code to do another redundant copy.

Return IRQ_SET_MASK_OK_NOCOPY to avoid this.

Signed-off-by: Jiang Liu <jiang.liu@huawei.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Naga Chumbalkar <nagananda.chumbalkar@hp.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Cliff Wickman <cpw@sgi.com>
Cc: Jiang Liu <liuj97@gmail.com>
Cc: Keping Chen <chenkeping@huawei.com>
Link: http://lkml.kernel.org/r/1333120296-13563-4-git-send-email-jiang.liu@huawei.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/io_apic.c | 9 +++++----
 arch/x86/platform/uv/uv_irq.c  | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ac96561d1a99..bce2001b2644 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2270,6 +2270,7 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
 		/* Only the high 8 bits are valid. */
 		dest = SET_APIC_LOGICAL_ID(dest);
 		__target_IO_APIC_irq(irq, dest, data->chip_data);
+		ret = IRQ_SET_MASK_OK_NOCOPY;
 	}
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 	return ret;
@@ -3092,7 +3093,7 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
 
 	__write_msi_msg(data->msi_desc, &msg);
 
-	return 0;
+	return IRQ_SET_MASK_OK_NOCOPY;
 }
 #endif /* CONFIG_SMP */
 
@@ -3214,7 +3215,7 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
 
 	dmar_msi_write(irq, &msg);
 
-	return 0;
+	return IRQ_SET_MASK_OK_NOCOPY;
 }
 
 #endif /* CONFIG_SMP */
@@ -3267,7 +3268,7 @@ static int hpet_msi_set_affinity(struct irq_data *data,
 
 	hpet_msi_write(data->handler_data, &msg);
 
-	return 0;
+	return IRQ_SET_MASK_OK_NOCOPY;
 }
 
 #endif /* CONFIG_SMP */
@@ -3340,7 +3341,7 @@ ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
 		return -1;
 
 	target_ht_irq(data->irq, dest, cfg->vector);
-	return 0;
+	return IRQ_SET_MASK_OK_NOCOPY;
 }
 
 #endif
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index f25c2765a5c9..a22c41656b52 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -222,7 +222,7 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
 	if (cfg->move_in_progress)
 		send_cleanup_vector(cfg);
 
-	return 0;
+	return IRQ_SET_MASK_OK_NOCOPY;
 }
 
 /*
-- 
cgit v1.2.3


From 4ee0444bebf3d0399c93841d98826ade193e6330 Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Tue, 29 May 2012 15:15:56 +0800
Subject: hexagon: SMP: Remove call to ipi_call_lock()/ipi_call_unlock()

ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock
protects only the call_function data structure itself, but it's
completely unrelated to cpu_online_mask. The mask to which the IPIs
are sent is calculated before call_function.lock is taken in
smp_call_function_many(), so the locking around set_cpu_online() is
pointless and can be removed.

[ tglx: Massaged changelog ]

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: ralf@linux-mips.org
Cc: sshtylyov@mvista.com
Cc: david.daney@cavium.com
Cc: nikunj@linux.vnet.ibm.com
Cc: paulmck@linux.vnet.ibm.com
Cc: axboe@kernel.dk
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: linux-hexagon@vger.kernel.org
Link: http://lkml.kernel.org/r/1338275765-3217-2-git-send-email-yong.zhang0@gmail.com
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/hexagon/kernel/smp.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c
index f7264621e58d..149fbefc1a4d 100644
--- a/arch/hexagon/kernel/smp.c
+++ b/arch/hexagon/kernel/smp.c
@@ -180,9 +180,7 @@ void __cpuinit start_secondary(void)
 
 	notify_cpu_starting(cpu);
 
-	ipi_call_lock();
 	set_cpu_online(cpu, true);
-	ipi_call_unlock();
 
 	local_irq_enable();
 
-- 
cgit v1.2.3


From 4cbd19fcf9d49f28766415b96eb6201bc263d817 Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Tue, 29 May 2012 15:15:57 +0800
Subject: mn10300: SMP: Remove call to ipi_call_lock()/ipi_call_unlock()

ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock
protects only the call_function data structure itself, but it's
completely unrelated to cpu_online_mask. The mask to which the IPIs
are sent is calculated before call_function.lock is taken in
smp_call_function_many(), so the locking around set_cpu_online() is
pointless and can be removed.

[ tglx: Massaged changelog ]

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: ralf@linux-mips.org
Cc: sshtylyov@mvista.com
Cc: david.daney@cavium.com
Cc: nikunj@linux.vnet.ibm.com
Cc: paulmck@linux.vnet.ibm.com
Cc: axboe@kernel.dk
Cc: peterz@infradead.org
Cc: David Howells <dhowells@redhat.com>
Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
Cc: linux-am33-list@redhat.com
Link: http://lkml.kernel.org/r/1338275765-3217-3-git-send-email-yong.zhang0@gmail.com
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/mn10300/kernel/smp.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index 090d35d36973..e62c223e4c45 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -876,9 +876,7 @@ static void __init smp_online(void)
 
 	notify_cpu_starting(cpu);
 
-	ipi_call_lock();
 	set_cpu_online(cpu, true);
-	ipi_call_unlock();
 
 	local_irq_enable();
 }
-- 
cgit v1.2.3


From ac4216d6922f90c459d531e051c832dadde3a8f7 Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Tue, 29 May 2012 15:15:58 +0800
Subject: parisc: Smp: remove call to ipi_call_lock()/ipi_call_unlock()

ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock
protects only the call_function data structure itself, but it's
completely unrelated to cpu_online_mask. The mask to which the IPIs
are sent is calculated before call_function.lock is taken in
smp_call_function_many(), so the locking around set_cpu_online() is
pointless and can be removed.

[ tglx: Massaged changelog ]

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: ralf@linux-mips.org
Cc: sshtylyov@mvista.com
Cc: david.daney@cavium.com
Cc: nikunj@linux.vnet.ibm.com
Cc: paulmck@linux.vnet.ibm.com
Cc: axboe@kernel.dk
Cc: peterz@infradead.org
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: Helge Deller <deller@gmx.de>
Cc: linux-parisc@vger.kernel.org
Link: http://lkml.kernel.org/r/1338275765-3217-4-git-send-email-yong.zhang0@gmail.com
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/parisc/kernel/smp.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index a47828d31fe6..6266730efd61 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -300,9 +300,7 @@ smp_cpu_init(int cpunum)
 
 	notify_cpu_starting(cpunum);
 
-	ipi_call_lock();
 	set_cpu_online(cpunum, true);
-	ipi_call_unlock();
 
 	/* Initialise the idle task for this CPU */
 	atomic_inc(&init_mm.mm_count);
-- 
cgit v1.2.3


From 46ce7fbfdbe0be6aaf0e816331aac08a74a00e5a Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Tue, 29 May 2012 15:15:59 +0800
Subject: S390: Smp: remove call to ipi_call_lock()/ipi_call_unlock()

ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock
protects only the call_function data structure itself, but it's
completely unrelated to cpu_online_mask. The mask to which the IPIs
are sent is calculated before call_function.lock is taken in
smp_call_function_many(), so the locking around set_cpu_online() is
pointless and can be removed.

[ tglx: Massaged changelog ]

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: ralf@linux-mips.org
Cc: sshtylyov@mvista.com
Cc: david.daney@cavium.com
Cc: nikunj@linux.vnet.ibm.com
Cc: paulmck@linux.vnet.ibm.com
Cc: axboe@kernel.dk
Cc: peterz@infradead.org
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: linux390@de.ibm.com
Cc: linux-s390@vger.kernel.org
Link: http://lkml.kernel.org/r/1338275765-3217-5-git-send-email-yong.zhang0@gmail.com
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/s390/kernel/smp.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 15cca26ccb6c..8dca9c248ac7 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -717,9 +717,7 @@ static void __cpuinit smp_start_secondary(void *cpuvoid)
 	init_cpu_vtimer();
 	pfault_init();
 	notify_cpu_starting(smp_processor_id());
-	ipi_call_lock();
 	set_cpu_online(smp_processor_id(), true);
-	ipi_call_unlock();
 	local_irq_enable();
 	/* cpu_idle will call schedule for us */
 	cpu_idle();
-- 
cgit v1.2.3


From 8efdfc3a4ed009c978dab6609d15fb958e7cff12 Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Tue, 29 May 2012 15:16:00 +0800
Subject: tile: SMP: Remove call to ipi_call_lock()/ipi_call_unlock()

ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock
protects only the call_function data structure itself, but it's
completely unrelated to cpu_online_mask. The mask to which the IPIs
are sent is calculated before call_function.lock is taken in
smp_call_function_many(), so the locking around set_cpu_online() is
pointless and can be removed.

[ tglx: Massaged changelog ]

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: ralf@linux-mips.org
Cc: sshtylyov@mvista.com
Cc: david.daney@cavium.com
Cc: nikunj@linux.vnet.ibm.com
Cc: paulmck@linux.vnet.ibm.com
Cc: axboe@kernel.dk
Cc: peterz@infradead.org
Cc: Chris Metcalf <cmetcalf@tilera.com>
Link: http://lkml.kernel.org/r/1338275765-3217-6-git-send-email-yong.zhang0@gmail.com
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/tile/kernel/smpboot.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index 84873fbe8f27..e686c5ac90be 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c
@@ -198,17 +198,7 @@ void __cpuinit online_secondary(void)
 
 	notify_cpu_starting(smp_processor_id());
 
-	/*
-	 * We need to hold call_lock, so there is no inconsistency
-	 * between the time smp_call_function() determines number of
-	 * IPI recipients, and the time when the determination is made
-	 * for which cpus receive the IPI. Holding this
-	 * lock helps us to not include this cpu in a currently in progress
-	 * smp_call_function().
-	 */
-	ipi_call_lock();
 	set_cpu_online(smp_processor_id(), 1);
-	ipi_call_unlock();
 	__get_cpu_var(cpu_state) = CPU_ONLINE;
 
 	/* Set up tile-specific state for this cpu. */
-- 
cgit v1.2.3


From 3b6f70fd7dd4e19fc674ec99e389bf0da5589525 Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Tue, 29 May 2012 15:16:01 +0800
Subject: x86-smp-remove-call-to-ipi_call_lock-ipi_call_unlock

ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock
protects only the call_function data structure itself, but it's
completely unrelated to cpu_online_mask. The mask to which the IPIs
are sent is calculated before call_function.lock is taken in
smp_call_function_many(), so the locking around set_cpu_online() is
pointless and can be removed.

[ tglx: Massaged changelog ]

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: ralf@linux-mips.org
Cc: sshtylyov@mvista.com
Cc: david.daney@cavium.com
Cc: nikunj@linux.vnet.ibm.com
Cc: paulmck@linux.vnet.ibm.com
Cc: axboe@kernel.dk
Cc: peterz@infradead.org
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: http://lkml.kernel.org/r/1338275765-3217-7-git-send-email-yong.zhang0@gmail.com
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/smpboot.c | 9 ---------
 arch/x86/xen/smp.c        | 2 --
 2 files changed, 11 deletions(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f56f96da77f5..b2fd28ff84b5 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -255,22 +255,13 @@ notrace static void __cpuinit start_secondary(void *unused)
 	check_tsc_sync_target();
 
 	/*
-	 * We need to hold call_lock, so there is no inconsistency
-	 * between the time smp_call_function() determines number of
-	 * IPI recipients, and the time when the determination is made
-	 * for which cpus receive the IPI. Holding this
-	 * lock helps us to not include this cpu in a currently in progress
-	 * smp_call_function().
-	 *
 	 * We need to hold vector_lock so there the set of online cpus
 	 * does not change while we are assigning vectors to cpus.  Holding
 	 * this lock ensures we don't half assign or remove an irq from a cpu.
 	 */
-	ipi_call_lock();
 	lock_vector_lock();
 	set_cpu_online(smp_processor_id(), true);
 	unlock_vector_lock();
-	ipi_call_unlock();
 	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
 	x86_platform.nmi_init();
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index afb250d22a6b..f58dca7a6e52 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -80,9 +80,7 @@ static void __cpuinit cpu_bringup(void)
 
 	notify_cpu_starting(cpu);
 
-	ipi_call_lock();
 	set_cpu_online(cpu, true);
-	ipi_call_unlock();
 
 	this_cpu_write(cpu_state, CPU_ONLINE);
 
-- 
cgit v1.2.3


From 459165e25030c0023cb54f73c14261a3d2f4a244 Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Tue, 29 May 2012 15:16:02 +0800
Subject: ia64: SMP: Remove call to ipi_call_lock_irq()/ipi_call_unlock_irq()

ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock
protects only the call_function data structure itself, but it's
completely unrelated to cpu_online_mask. The mask to which the IPIs
are sent is calculated before call_function.lock is taken in
smp_call_function_many(), so the locking around set_cpu_online() is
pointless and can be removed.

[ tglx: Massaged changelog ]

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: ralf@linux-mips.org
Cc: sshtylyov@mvista.com
Cc: david.daney@cavium.com
Cc: nikunj@linux.vnet.ibm.com
Cc: paulmck@linux.vnet.ibm.com
Cc: axboe@kernel.dk
Cc: peterz@infradead.org
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: linux-ia64@vger.kernel.org
Link: http://lkml.kernel.org/r/1338275765-3217-8-git-send-email-yong.zhang0@gmail.com
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/ia64/kernel/smpboot.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 1113b8aba07f..963d2db53bfa 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -382,7 +382,6 @@ smp_callin (void)
 	set_numa_node(cpu_to_node_map[cpuid]);
 	set_numa_mem(local_memory_node(cpu_to_node_map[cpuid]));
 
-	ipi_call_lock_irq();
 	spin_lock(&vector_lock);
 	/* Setup the per cpu irq handling data structures */
 	__setup_vector_irq(cpuid);
@@ -390,7 +389,6 @@ smp_callin (void)
 	set_cpu_online(cpuid, true);
 	per_cpu(cpu_state, cpuid) = CPU_ONLINE;
 	spin_unlock(&vector_lock);
-	ipi_call_unlock_irq();
 
 	smp_setup_percpu_timer();
 
-- 
cgit v1.2.3


From bc6833009583bd5b096ef7aa2bb006854a5a2dce Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Tue, 29 May 2012 16:27:33 +0800
Subject: SPARC: SMP: Remove call to ipi_call_lock_irq()/ipi_call_unlock_irq()

ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock
protects only the call_function data structure itself, but it's
completely unrelated to cpu_online_mask. The mask to which the IPIs
are sent is calculated before call_function.lock is taken in
smp_call_function_many(), so the locking around set_cpu_online() is
pointless and can be removed.

Delay irq enable to after set_cpu_online().

[ tglx: Massaged changelog ]

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: ralf@linux-mips.org
Cc: sshtylyov@mvista.com
Cc: david.daney@cavium.com
Cc: nikunj@linux.vnet.ibm.com
Cc: paulmck@linux.vnet.ibm.com
Cc: axboe@kernel.dk
Cc: peterz@infradead.org
Cc: sparclinux@vger.kernel.org
Link: http://lkml.kernel.org/r/20120529082732.GA4250@zhy
Acked-by: "David S. Miller" <davem@davemloft.net>
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/sparc/kernel/smp_64.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index f591598d92f6..781bcb10b8bd 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -103,8 +103,6 @@ void __cpuinit smp_callin(void)
 	if (cheetah_pcache_forced_on)
 		cheetah_enable_pcache();
 
-	local_irq_enable();
-
 	callin_flag = 1;
 	__asm__ __volatile__("membar #Sync\n\t"
 			     "flush  %%g6" : : : "memory");
@@ -124,9 +122,8 @@ void __cpuinit smp_callin(void)
 	while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
 		rmb();
 
-	ipi_call_lock_irq();
 	set_cpu_online(cpuid, true);
-	ipi_call_unlock_irq();
+	local_irq_enable();
 
 	/* idle thread is expected to have preempt disabled */
 	preempt_disable();
@@ -1308,9 +1305,7 @@ int __cpu_disable(void)
 	mdelay(1);
 	local_irq_disable();
 
-	ipi_call_lock();
 	set_cpu_online(cpu, false);
-	ipi_call_unlock();
 
 	cpu_map_rebuild();
 
-- 
cgit v1.2.3


From 72ec61a9e8347375e37c05e23511173d8451c3d4 Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Tue, 29 May 2012 15:16:04 +0800
Subject: POWERPC: Smp: remove call to ipi_call_lock()/ipi_call_unlock()

ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock
protects only the call_function data structure itself, but it's
completely unrelated to cpu_online_mask. The mask to which the IPIs
are sent is calculated before call_function.lock is taken in
smp_call_function_many(), so the locking around set_cpu_online() is
pointless and can be removed.

[ tglx: Massaged changelog ]

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: ralf@linux-mips.org
Cc: sshtylyov@mvista.com
Cc: david.daney@cavium.com
Cc: nikunj@linux.vnet.ibm.com
Cc: paulmck@linux.vnet.ibm.com
Cc: axboe@kernel.dk
Cc: peterz@infradead.org
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1338275765-3217-10-git-send-email-yong.zhang0@gmail.com
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/powerpc/kernel/smp.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e4cb34322de4..e1417c42155c 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -571,7 +571,6 @@ void __devinit start_secondary(void *unused)
 	if (system_state == SYSTEM_RUNNING)
 		vdso_data->processorCount++;
 #endif
-	ipi_call_lock();
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
 	/* Update sibling maps */
@@ -601,7 +600,6 @@ void __devinit start_secondary(void *unused)
 		of_node_put(np);
 	}
 	of_node_put(l2_cache);
-	ipi_call_unlock();
 
 	local_irq_enable();
 
-- 
cgit v1.2.3


From 8feb8e896d77439146d2e2ab3d0ab55bb5baf5fc Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Tue, 29 May 2012 15:16:05 +0800
Subject: smp: Remove ipi_call_lock[_irq]()/ipi_call_unlock[_irq]()

There is no user of those APIs anymore, just remove it.

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: ralf@linux-mips.org
Cc: sshtylyov@mvista.com
Cc: david.daney@cavium.com
Cc: nikunj@linux.vnet.ibm.com
Cc: paulmck@linux.vnet.ibm.com
Cc: axboe@kernel.dk
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1338275765-3217-11-git-send-email-yong.zhang0@gmail.com
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/smp.h |  4 ----
 kernel/smp.c        | 20 --------------------
 2 files changed, 24 deletions(-)

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 717fb746c9a8..a34d4f15430d 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -90,10 +90,6 @@ void kick_all_cpus_sync(void);
 void __init call_function_init(void);
 void generic_smp_call_function_single_interrupt(void);
 void generic_smp_call_function_interrupt(void);
-void ipi_call_lock(void);
-void ipi_call_unlock(void);
-void ipi_call_lock_irq(void);
-void ipi_call_unlock_irq(void);
 #else
 static inline void call_function_init(void) { }
 #endif
diff --git a/kernel/smp.c b/kernel/smp.c
index d0ae5b24875e..29dd40a9f2f4 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -581,26 +581,6 @@ int smp_call_function(smp_call_func_t func, void *info, int wait)
 	return 0;
 }
 EXPORT_SYMBOL(smp_call_function);
-
-void ipi_call_lock(void)
-{
-	raw_spin_lock(&call_function.lock);
-}
-
-void ipi_call_unlock(void)
-{
-	raw_spin_unlock(&call_function.lock);
-}
-
-void ipi_call_lock_irq(void)
-{
-	raw_spin_lock_irq(&call_function.lock);
-}
-
-void ipi_call_unlock_irq(void)
-{
-	raw_spin_unlock_irq(&call_function.lock);
-}
 #endif /* USE_GENERIC_SMP_HELPERS */
 
 /* Setup configured maximum number of CPUs to activate */
-- 
cgit v1.2.3


From 43cc7e86f3200b094e2960b732623aeec00b482d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 15 May 2012 17:26:16 +0200
Subject: smp: Remove num_booting_cpus()

No users.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/m32r/include/asm/smp.h | 5 -----
 arch/x86/include/asm/smp.h  | 5 -----
 include/linux/smp.h         | 1 -
 3 files changed, 11 deletions(-)

diff --git a/arch/m32r/include/asm/smp.h b/arch/m32r/include/asm/smp.h
index cf7829a61551..c689b828dfe2 100644
--- a/arch/m32r/include/asm/smp.h
+++ b/arch/m32r/include/asm/smp.h
@@ -79,11 +79,6 @@ static __inline__ int cpu_number_map(int cpu)
 	return cpu;
 }
 
-static __inline__ unsigned int num_booting_cpus(void)
-{
-	return cpumask_weight(&cpu_callout_map);
-}
-
 extern void smp_send_timer(void);
 extern unsigned long send_IPI_mask_phys(const cpumask_t*, int, int);
 
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index f48394513c37..2ffa95dc2333 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -169,11 +169,6 @@ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
 void smp_store_cpu_info(int id);
 #define cpu_physical_id(cpu)	per_cpu(x86_cpu_to_apicid, cpu)
 
-/* We don't mark CPUs online until __cpu_up(), so we need another measure */
-static inline int num_booting_cpus(void)
-{
-	return cpumask_weight(cpu_callout_mask);
-}
 #else /* !CONFIG_SMP */
 #define wbinvd_on_cpu(cpu)     wbinvd()
 static inline int wbinvd_on_all_cpus(void)
diff --git a/include/linux/smp.h b/include/linux/smp.h
index a34d4f15430d..dd6f06be3c9f 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -177,7 +177,6 @@ static inline int up_smp_call_function(smp_call_func_t func, void *info)
 	} while (0)
 
 static inline void smp_send_reschedule(int cpu) { }
-#define num_booting_cpus()			1
 #define smp_prepare_boot_cpu()			do {} while (0)
 #define smp_call_function_many(mask, func, info, wait) \
 			(up_smp_call_function(func, info))
-- 
cgit v1.2.3


From 7db971b235480849aa5b9209b67b62e987b3181b Mon Sep 17 00:00:00 2001
From: Ido Yariv <ido@wizery.com>
Date: Sun, 3 Jun 2012 01:11:34 +0300
Subject: x86/platform: Introduce APIC post-initialization callback

Some subarchitectures (such as vSMP) need to slightly adjust the
underlying APIC structure. Add an APIC post-initialization callback
to 'struct x86_platform_ops' for this purpose and use it for
adjusting the APIC structure on vSMP systems.

Signed-off-by: Ido Yariv <ido@wizery.com>
Acked-by: Shai Fultheim <shai@scalemp.com>
Link: http://lkml.kernel.org/r/1338675095-27260-1-git-send-email-ido@wizery.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/x86_init.h |  2 ++
 arch/x86/kernel/apic/probe_32.c |  3 +++
 arch/x86/kernel/apic/probe_64.c | 11 ++---------
 arch/x86/kernel/vsmp_64.c       | 13 +++++++++++++
 4 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index c090af10ac7d..c377d9ccb696 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -164,6 +164,7 @@ struct x86_cpuinit_ops {
  * @i8042_detect		pre-detect if i8042 controller exists
  * @save_sched_clock_state:	save state for sched_clock() on suspend
  * @restore_sched_clock_state:	restore state for sched_clock() on resume
+ * @apic_post_init:		adjust apic if neeeded
  */
 struct x86_platform_ops {
 	unsigned long (*calibrate_tsc)(void);
@@ -177,6 +178,7 @@ struct x86_platform_ops {
 	int (*i8042_detect)(void);
 	void (*save_sched_clock_state)(void);
 	void (*restore_sched_clock_state)(void);
+	void (*apic_post_init)(void);
 };
 
 struct pci_dev;
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 1b291da09e60..8616d5198e16 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -208,6 +208,9 @@ void __init default_setup_apic_routing(void)
 
 	if (apic->setup_apic_routing)
 		apic->setup_apic_routing();
+
+	if (x86_platform.apic_post_init)
+		x86_platform.apic_post_init();
 }
 
 void __init generic_apic_probe(void)
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 3fe986698929..1793dba7a741 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -23,11 +23,6 @@
 #include <asm/ipi.h>
 #include <asm/setup.h>
 
-static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
-{
-	return hard_smp_processor_id() >> index_msb;
-}
-
 /*
  * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
  */
@@ -48,10 +43,8 @@ void __init default_setup_apic_routing(void)
 		}
 	}
 
-	if (is_vsmp_box()) {
-		/* need to update phys_pkg_id */
-		apic->phys_pkg_id = apicid_phys_pkg_id;
-	}
+	if (x86_platform.apic_post_init)
+		x86_platform.apic_post_init();
 }
 
 /* Same for both flat and physical. */
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 8eeb55a551b4..59eea855f451 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -187,12 +187,25 @@ static void __init vsmp_cap_cpus(void)
 #endif
 }
 
+static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
+{
+	return hard_smp_processor_id() >> index_msb;
+}
+
+static void vsmp_apic_post_init(void)
+{
+	/* need to update phys_pkg_id */
+	apic->phys_pkg_id = apicid_phys_pkg_id;
+}
+
 void __init vsmp_init(void)
 {
 	detect_vsmp_box();
 	if (!is_vsmp_box())
 		return;
 
+	x86_platform.apic_post_init = vsmp_apic_post_init;
+
 	vsmp_cap_cpus();
 
 	set_vsmp_pv_ops();
-- 
cgit v1.2.3


From c767a54ba0657e52e6edaa97cbe0b0a8bf1c1655 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 21 May 2012 19:50:07 -0700
Subject: x86/debug: Add KERN_<LEVEL> to bare printks, convert printks to
 pr_<level>

Use a more current logging style:

 - Bare printks should have a KERN_<LEVEL> for consistency's sake
 - Add pr_fmt where appropriate
 - Neaten some macro definitions
 - Convert some Ok output to OK
 - Use "%s: ", __func__ in pr_fmt for summit
 - Convert some printks to pr_<level>

Message output is not identical in all cases.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: levinsasha928@gmail.com
Link: http://lkml.kernel.org/r/1337655007.24226.10.camel@joe2Laptop
[ merged two similar patches, tidied up the changelog ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/floppy.h          |  2 +-
 arch/x86/include/asm/pci_x86.h         |  8 ++-
 arch/x86/include/asm/pgtable-2level.h  |  4 +-
 arch/x86/include/asm/pgtable-3level.h  |  6 +--
 arch/x86/include/asm/pgtable_64.h      |  8 +--
 arch/x86/kernel/alternative.c          | 17 +++---
 arch/x86/kernel/amd_nb.c               | 10 ++--
 arch/x86/kernel/apic/io_apic.c         | 38 ++++++-------
 arch/x86/kernel/apic/summit_32.c       | 22 ++++----
 arch/x86/kernel/apm_32.c               | 29 +++++-----
 arch/x86/kernel/cpu/bugs.c             | 20 +++----
 arch/x86/kernel/cpu/mcheck/mce.c       | 22 ++++----
 arch/x86/kernel/cpu/perf_event_intel.c | 14 ++---
 arch/x86/kernel/dumpstack.c            |  4 +-
 arch/x86/kernel/dumpstack_32.c         | 24 ++++-----
 arch/x86/kernel/dumpstack_64.c         | 20 +++----
 arch/x86/kernel/irq.c                  |  4 +-
 arch/x86/kernel/module.c               | 32 ++++++-----
 arch/x86/kernel/pci-calgary_64.c       | 34 ++++++------
 arch/x86/kernel/process.c              | 34 ++++++------
 arch/x86/kernel/process_64.c           |  8 +--
 arch/x86/kernel/reboot.c               | 14 +++--
 arch/x86/kernel/signal.c               |  5 +-
 arch/x86/kernel/smpboot.c              | 97 ++++++++++++++++------------------
 arch/x86/kernel/traps.c                | 19 +++----
 arch/x86/kernel/tsc.c                  | 50 +++++++++---------
 arch/x86/kernel/vm86_32.c              |  6 ++-
 arch/x86/kernel/vsyscall_64.c          | 17 +++---
 arch/x86/kernel/xsave.c                | 12 +++--
 29 files changed, 301 insertions(+), 279 deletions(-)

diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h
index dbe82a5c5eac..d3d74698dce9 100644
--- a/arch/x86/include/asm/floppy.h
+++ b/arch/x86/include/asm/floppy.h
@@ -99,7 +99,7 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id)
 		virtual_dma_residue += virtual_dma_count;
 		virtual_dma_count = 0;
 #ifdef TRACE_FLPY_INT
-		printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n",
+		printk(KERN_DEBUG "count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n",
 		       virtual_dma_count, virtual_dma_residue, calls, bytes,
 		       dma_wait);
 		calls = 0;
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index b3a531746026..5ad24a89b19b 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -7,9 +7,13 @@
 #undef DEBUG
 
 #ifdef DEBUG
-#define DBG(x...) printk(x)
+#define DBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
 #else
-#define DBG(x...)
+#define DBG(fmt, ...)				\
+do {						\
+	if (0)					\
+		printk(fmt, ##__VA_ARGS__);	\
+} while (0)
 #endif
 
 #define PCI_PROBE_BIOS		0x0001
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 98391db840c6..f2b489cf1602 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -2,9 +2,9 @@
 #define _ASM_X86_PGTABLE_2LEVEL_H
 
 #define pte_ERROR(e) \
-	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low)
+	pr_err("%s:%d: bad pte %08lx\n", __FILE__, __LINE__, (e).pte_low)
 #define pgd_ERROR(e) \
-	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+	pr_err("%s:%d: bad pgd %08lx\n", __FILE__, __LINE__, pgd_val(e))
 
 /*
  * Certain architectures need to do special things when PTEs
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 43876f16caf1..f824cfbaa9d4 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -9,13 +9,13 @@
  */
 
 #define pte_ERROR(e)							\
-	printk("%s:%d: bad pte %p(%08lx%08lx).\n",			\
+	pr_err("%s:%d: bad pte %p(%08lx%08lx)\n",			\
 	       __FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low)
 #define pmd_ERROR(e)							\
-	printk("%s:%d: bad pmd %p(%016Lx).\n",				\
+	pr_err("%s:%d: bad pmd %p(%016Lx)\n",				\
 	       __FILE__, __LINE__, &(e), pmd_val(e))
 #define pgd_ERROR(e)							\
-	printk("%s:%d: bad pgd %p(%016Lx).\n",				\
+	pr_err("%s:%d: bad pgd %p(%016Lx)\n",				\
 	       __FILE__, __LINE__, &(e), pgd_val(e))
 
 /* Rules for using set_pte: the pte being assigned *must* be
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 975f709e09ae..8251be02301e 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -26,16 +26,16 @@ extern pgd_t init_level4_pgt[];
 extern void paging_init(void);
 
 #define pte_ERROR(e)					\
-	printk("%s:%d: bad pte %p(%016lx).\n",		\
+	pr_err("%s:%d: bad pte %p(%016lx)\n",		\
 	       __FILE__, __LINE__, &(e), pte_val(e))
 #define pmd_ERROR(e)					\
-	printk("%s:%d: bad pmd %p(%016lx).\n",		\
+	pr_err("%s:%d: bad pmd %p(%016lx)\n",		\
 	       __FILE__, __LINE__, &(e), pmd_val(e))
 #define pud_ERROR(e)					\
-	printk("%s:%d: bad pud %p(%016lx).\n",		\
+	pr_err("%s:%d: bad pud %p(%016lx)\n",		\
 	       __FILE__, __LINE__, &(e), pud_val(e))
 #define pgd_ERROR(e)					\
-	printk("%s:%d: bad pgd %p(%016lx).\n",		\
+	pr_err("%s:%d: bad pgd %p(%016lx)\n",		\
 	       __FILE__, __LINE__, &(e), pgd_val(e))
 
 struct mm_struct;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 1f84794f0759..1729d720299a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) "SMP alternatives: " fmt
+
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/mutex.h>
@@ -63,8 +65,11 @@ static int __init setup_noreplace_paravirt(char *str)
 __setup("noreplace-paravirt", setup_noreplace_paravirt);
 #endif
 
-#define DPRINTK(fmt, args...) if (debug_alternative) \
-	printk(KERN_DEBUG fmt, args)
+#define DPRINTK(fmt, ...)				\
+do {							\
+	if (debug_alternative)				\
+		printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\
+} while (0)
 
 /*
  * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
@@ -428,7 +433,7 @@ void alternatives_smp_switch(int smp)
 	 * If this still occurs then you should see a hang
 	 * or crash shortly after this line:
 	 */
-	printk("lockdep: fixing up alternatives.\n");
+	pr_info("lockdep: fixing up alternatives\n");
 #endif
 
 	if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
@@ -444,14 +449,14 @@ void alternatives_smp_switch(int smp)
 	if (smp == smp_mode) {
 		/* nothing */
 	} else if (smp) {
-		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
+		pr_info("switching to SMP code\n");
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
 		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 		list_for_each_entry(mod, &smp_alt_modules, next)
 			alternatives_smp_lock(mod->locks, mod->locks_end,
 					      mod->text, mod->text_end);
 	} else {
-		printk(KERN_INFO "SMP alternatives: switching to UP code\n");
+		pr_info("switching to UP code\n");
 		set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
 		set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 		list_for_each_entry(mod, &smp_alt_modules, next)
@@ -546,7 +551,7 @@ void __init alternative_instructions(void)
 #ifdef CONFIG_SMP
 	if (smp_alt_once) {
 		if (1 == num_possible_cpus()) {
-			printk(KERN_INFO "SMP alternatives: switching to UP code\n");
+			pr_info("switching to UP code\n");
 			set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
 			set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index be16854591cc..f29f6dd6bc08 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -2,6 +2,9 @@
  * Shared support code for AMD K8 northbridges and derivates.
  * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/init.h>
@@ -258,7 +261,7 @@ void amd_flush_garts(void)
 	}
 	spin_unlock_irqrestore(&gart_lock, flags);
 	if (!flushed)
-		printk("nothing to flush?\n");
+		pr_notice("nothing to flush?\n");
 }
 EXPORT_SYMBOL_GPL(amd_flush_garts);
 
@@ -269,11 +272,10 @@ static __init int init_amd_nbs(void)
 	err = amd_cache_northbridges();
 
 	if (err < 0)
-		printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n");
+		pr_notice("Cannot enumerate AMD northbridges\n");
 
 	if (amd_cache_gart() < 0)
-		printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, "
-		       "GART support disabled.\n");
+		pr_notice("Cannot initialize GART flush words, GART support disabled\n");
 
 	return err;
 }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ac96561d1a99..5155d6f806f5 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -448,8 +448,8 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi
 
 	entry = alloc_irq_pin_list(node);
 	if (!entry) {
-		printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
-				node, apic, pin);
+		pr_err("can not alloc irq_pin_list (%d,%d,%d)\n",
+		       node, apic, pin);
 		return -ENOMEM;
 	}
 	entry->apic = apic;
@@ -661,7 +661,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 	ioapic_mask_entry(apic, pin);
 	entry = ioapic_read_entry(apic, pin);
 	if (entry.irr)
-		printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n",
+		pr_err("Unable to reset IRR for apic: %d, pin :%d\n",
 		       mpc_ioapic_id(apic), pin);
 }
 
@@ -895,7 +895,7 @@ static int irq_polarity(int idx)
 		}
 		case 2: /* reserved */
 		{
-			printk(KERN_WARNING "broken BIOS!!\n");
+			pr_warn("broken BIOS!!\n");
 			polarity = 1;
 			break;
 		}
@@ -906,7 +906,7 @@ static int irq_polarity(int idx)
 		}
 		default: /* invalid */
 		{
-			printk(KERN_WARNING "broken BIOS!!\n");
+			pr_warn("broken BIOS!!\n");
 			polarity = 1;
 			break;
 		}
@@ -948,7 +948,7 @@ static int irq_trigger(int idx)
 				}
 				default:
 				{
-					printk(KERN_WARNING "broken BIOS!!\n");
+					pr_warn("broken BIOS!!\n");
 					trigger = 1;
 					break;
 				}
@@ -962,7 +962,7 @@ static int irq_trigger(int idx)
 		}
 		case 2: /* reserved */
 		{
-			printk(KERN_WARNING "broken BIOS!!\n");
+			pr_warn("broken BIOS!!\n");
 			trigger = 1;
 			break;
 		}
@@ -973,7 +973,7 @@ static int irq_trigger(int idx)
 		}
 		default: /* invalid */
 		{
-			printk(KERN_WARNING "broken BIOS!!\n");
+			pr_warn("broken BIOS!!\n");
 			trigger = 0;
 			break;
 		}
@@ -991,7 +991,7 @@ static int pin_2_irq(int idx, int apic, int pin)
 	 * Debugging check, we are in big trouble if this message pops up!
 	 */
 	if (mp_irqs[idx].dstirq != pin)
-		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+		pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
 
 	if (test_bit(bus, mp_bus_not_pci)) {
 		irq = mp_irqs[idx].srcbusirq;
@@ -1521,7 +1521,6 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
 		reg_03.raw = io_apic_read(ioapic_idx, 3);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
-	printk("\n");
 	printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
 	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
@@ -1578,7 +1577,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
 				i,
 				ir_entry->index
 			);
-			printk("%1d   %1d    %1d    %1d   %1d   "
+			pr_cont("%1d   %1d    %1d    %1d   %1d   "
 				"%1d    %1d     %X    %02X\n",
 				ir_entry->format,
 				ir_entry->mask,
@@ -1598,7 +1597,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
 				i,
 				entry.dest
 			);
-			printk("%1d    %1d    %1d   %1d   %1d    "
+			pr_cont("%1d    %1d    %1d   %1d   %1d    "
 				"%1d    %1d    %02X\n",
 				entry.mask,
 				entry.trigger,
@@ -1651,8 +1650,8 @@ __apicdebuginit(void) print_IO_APICs(void)
 			continue;
 		printk(KERN_DEBUG "IRQ%d ", irq);
 		for_each_irq_pin(entry, cfg->irq_2_pin)
-			printk("-> %d:%d", entry->apic, entry->pin);
-		printk("\n");
+			pr_cont("-> %d:%d", entry->apic, entry->pin);
+		pr_cont("\n");
 	}
 
 	printk(KERN_INFO ".................................... done.\n");
@@ -1665,9 +1664,9 @@ __apicdebuginit(void) print_APIC_field(int base)
 	printk(KERN_DEBUG);
 
 	for (i = 0; i < 8; i++)
-		printk(KERN_CONT "%08x", apic_read(base + i*0x10));
+		pr_cont("%08x", apic_read(base + i*0x10));
 
-	printk(KERN_CONT "\n");
+	pr_cont("\n");
 }
 
 __apicdebuginit(void) print_local_APIC(void *dummy)
@@ -1769,7 +1768,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 			printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
 		}
 	}
-	printk("\n");
+	pr_cont("\n");
 }
 
 __apicdebuginit(void) print_local_APICs(int maxcpu)
@@ -2065,7 +2064,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
 		reg_00.raw = io_apic_read(ioapic_idx, 0);
 		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 		if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx))
-			printk("could not set ID!\n");
+			pr_cont("could not set ID!\n");
 		else
 			apic_printk(APIC_VERBOSE, " ok.\n");
 	}
@@ -3563,7 +3562,8 @@ static int __init io_apic_get_unique_id(int ioapic, int apic_id)
 
 		/* Sanity check */
 		if (reg_00.bits.ID != apic_id) {
-			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
+			pr_err("IOAPIC[%d]: Unable to change apic_id!\n",
+			       ioapic);
 			return -1;
 		}
 	}
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 659897c00755..e97d542ccdd0 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -26,6 +26,8 @@
  *
  */
 
+#define pr_fmt(fmt) "summit: %s: " fmt, __func__
+
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <asm/io.h>
@@ -235,8 +237,8 @@ static int summit_apic_id_registered(void)
 
 static void summit_setup_apic_routing(void)
 {
-	printk("Enabling APIC mode:  Summit.  Using %d I/O APICs\n",
-						nr_ioapics);
+	pr_info("Enabling APIC mode:  Summit.  Using %d I/O APICs\n",
+		nr_ioapics);
 }
 
 static int summit_cpu_present_to_apicid(int mps_cpu)
@@ -275,7 +277,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
 		int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 
 		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
-			printk("%s: Not a valid mask!\n", __func__);
+			pr_err("Not a valid mask!\n");
 			return BAD_APICID;
 		}
 		apicid |= new_apicid;
@@ -355,7 +357,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
 		}
 	}
 	if (i == rio_table_hdr->num_rio_dev) {
-		printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__);
+		pr_err("Couldn't find owner Cyclone for Winnipeg!\n");
 		return last_bus;
 	}
 
@@ -366,7 +368,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
 		}
 	}
 	if (i == rio_table_hdr->num_scal_dev) {
-		printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__);
+		pr_err("Couldn't find owner Twister for Cyclone!\n");
 		return last_bus;
 	}
 
@@ -396,7 +398,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
 		num_buses = 9;
 		break;
 	default:
-		printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__);
+		pr_info("Unsupported Winnipeg type!\n");
 		return last_bus;
 	}
 
@@ -411,13 +413,15 @@ static int build_detail_arrays(void)
 	int i, scal_detail_size, rio_detail_size;
 
 	if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) {
-		printk(KERN_WARNING "%s: MAX_NUMNODES too low!  Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
+		pr_warn("MAX_NUMNODES too low!  Defined as %d, but system has %d nodes\n",
+			MAX_NUMNODES, rio_table_hdr->num_scal_dev);
 		return 0;
 	}
 
 	switch (rio_table_hdr->version) {
 	default:
-		printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version);
+		pr_warn("Invalid Rio Grande Table Version: %d\n",
+			rio_table_hdr->version);
 		return 0;
 	case 2:
 		scal_detail_size = 11;
@@ -462,7 +466,7 @@ void setup_summit(void)
 		offset = *((unsigned short *)(ptr + offset));
 	}
 	if (!rio_table_hdr) {
-		printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__);
+		pr_err("Unable to locate Rio Grande Table in EBDA - bailing!\n");
 		return;
 	}
 
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 07b0c0db466c..d65464e43503 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -201,6 +201,8 @@
  *    http://www.microsoft.com/whdc/archive/amp_12.mspx]
  */
 
+#define pr_fmt(fmt) "apm: " fmt
+
 #include <linux/module.h>
 
 #include <linux/poll.h>
@@ -485,11 +487,11 @@ static void apm_error(char *str, int err)
 		if (error_table[i].key == err)
 			break;
 	if (i < ERROR_COUNT)
-		printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg);
+		pr_notice("%s: %s\n", str, error_table[i].msg);
 	else if (err < 0)
-		printk(KERN_NOTICE "apm: %s: linux error code %i\n", str, err);
+		pr_notice("%s: linux error code %i\n", str, err);
 	else
-		printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n",
+		pr_notice("%s: unknown error code %#2.2x\n",
 		       str, err);
 }
 
@@ -1184,7 +1186,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender)
 			static int notified;
 
 			if (notified++ == 0)
-			    printk(KERN_ERR "apm: an event queue overflowed\n");
+				pr_err("an event queue overflowed\n");
 			if (++as->event_tail >= APM_MAX_EVENTS)
 				as->event_tail = 0;
 		}
@@ -1447,7 +1449,7 @@ static void apm_mainloop(void)
 static int check_apm_user(struct apm_user *as, const char *func)
 {
 	if (as == NULL || as->magic != APM_BIOS_MAGIC) {
-		printk(KERN_ERR "apm: %s passed bad filp\n", func);
+		pr_err("%s passed bad filp\n", func);
 		return 1;
 	}
 	return 0;
@@ -1586,7 +1588,7 @@ static int do_release(struct inode *inode, struct file *filp)
 		     as1 = as1->next)
 			;
 		if (as1 == NULL)
-			printk(KERN_ERR "apm: filp not in user list\n");
+			pr_err("filp not in user list\n");
 		else
 			as1->next = as->next;
 	}
@@ -1600,11 +1602,9 @@ static int do_open(struct inode *inode, struct file *filp)
 	struct apm_user *as;
 
 	as = kmalloc(sizeof(*as), GFP_KERNEL);
-	if (as == NULL) {
-		printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
-		       sizeof(*as));
+	if (as == NULL)
 		return -ENOMEM;
-	}
+
 	as->magic = APM_BIOS_MAGIC;
 	as->event_tail = as->event_head = 0;
 	as->suspends_pending = as->standbys_pending = 0;
@@ -2313,16 +2313,16 @@ static int __init apm_init(void)
 	}
 
 	if (apm_info.disabled) {
-		printk(KERN_NOTICE "apm: disabled on user request.\n");
+		pr_notice("disabled on user request.\n");
 		return -ENODEV;
 	}
 	if ((num_online_cpus() > 1) && !power_off && !smp) {
-		printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n");
+		pr_notice("disabled - APM is not SMP safe.\n");
 		apm_info.disabled = 1;
 		return -ENODEV;
 	}
 	if (!acpi_disabled) {
-		printk(KERN_NOTICE "apm: overridden by ACPI.\n");
+		pr_notice("overridden by ACPI.\n");
 		apm_info.disabled = 1;
 		return -ENODEV;
 	}
@@ -2356,8 +2356,7 @@ static int __init apm_init(void)
 
 	kapmd_task = kthread_create(apm, NULL, "kapmd");
 	if (IS_ERR(kapmd_task)) {
-		printk(KERN_ERR "apm: disabled - Unable to start kernel "
-				"thread.\n");
+		pr_err("disabled - Unable to start kernel thread\n");
 		err = PTR_ERR(kapmd_task);
 		kapmd_task = NULL;
 		remove_proc_entry("apm", NULL);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 46674fbb62ba..c97bb7b5a9f8 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -55,8 +55,8 @@ static void __init check_fpu(void)
 
 	if (!boot_cpu_data.hard_math) {
 #ifndef CONFIG_MATH_EMULATION
-		printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
-		printk(KERN_EMERG "Giving up.\n");
+		pr_emerg("No coprocessor found and no math emulation present\n");
+		pr_emerg("Giving up\n");
 		for (;;) ;
 #endif
 		return;
@@ -86,7 +86,7 @@ static void __init check_fpu(void)
 
 	boot_cpu_data.fdiv_bug = fdiv_bug;
 	if (boot_cpu_data.fdiv_bug)
-		printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n");
+		pr_warn("Hmm, FPU with FDIV bug\n");
 }
 
 static void __init check_hlt(void)
@@ -94,16 +94,16 @@ static void __init check_hlt(void)
 	if (boot_cpu_data.x86 >= 5 || paravirt_enabled())
 		return;
 
-	printk(KERN_INFO "Checking 'hlt' instruction... ");
+	pr_info("Checking 'hlt' instruction... ");
 	if (!boot_cpu_data.hlt_works_ok) {
-		printk("disabled\n");
+		pr_cont("disabled\n");
 		return;
 	}
 	halt();
 	halt();
 	halt();
 	halt();
-	printk(KERN_CONT "OK.\n");
+	pr_cont("OK\n");
 }
 
 /*
@@ -116,7 +116,7 @@ static void __init check_popad(void)
 #ifndef CONFIG_X86_POPAD_OK
 	int res, inp = (int) &res;
 
-	printk(KERN_INFO "Checking for popad bug... ");
+	pr_info("Checking for popad bug... ");
 	__asm__ __volatile__(
 	  "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
 	  : "=&a" (res)
@@ -127,9 +127,9 @@ static void __init check_popad(void)
 	 * CPU hard. Too bad.
 	 */
 	if (res != 12345678)
-		printk(KERN_CONT "Buggy.\n");
+		pr_cont("Buggy\n");
 	else
-		printk(KERN_CONT "OK.\n");
+		pr_cont("OK\n");
 #endif
 }
 
@@ -161,7 +161,7 @@ void __init check_bugs(void)
 {
 	identify_boot_cpu();
 #ifndef CONFIG_SMP
-	printk(KERN_INFO "CPU: ");
+	pr_info("CPU: ");
 	print_cpu_info(&boot_cpu_data);
 #endif
 	check_config();
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 0a687fd185e6..5623b4b5d511 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -7,6 +7,9 @@
  * Copyright 2008 Intel Corporation
  * Author: Andi Kleen
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/thread_info.h>
 #include <linux/capability.h>
 #include <linux/miscdevice.h>
@@ -210,7 +213,7 @@ static void drain_mcelog_buffer(void)
 				cpu_relax();
 
 				if (!m->finished && retries >= 4) {
-					pr_err("MCE: skipping error being logged currently!\n");
+					pr_err("skipping error being logged currently!\n");
 					break;
 				}
 			}
@@ -1167,8 +1170,9 @@ int memory_failure(unsigned long pfn, int vector, int flags)
 {
 	/* mce_severity() should not hand us an ACTION_REQUIRED error */
 	BUG_ON(flags & MF_ACTION_REQUIRED);
-	printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n"
-		"Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn);
+	pr_err("Uncorrected memory error in page 0x%lx ignored\n"
+	       "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n",
+	       pfn);
 
 	return 0;
 }
@@ -1358,11 +1362,10 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
 
 	b = cap & MCG_BANKCNT_MASK;
 	if (!banks)
-		printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
+		pr_info("CPU supports %d MCE banks\n", b);
 
 	if (b > MAX_NR_BANKS) {
-		printk(KERN_WARNING
-		       "MCE: Using only %u machine check banks out of %u\n",
+		pr_warn("Using only %u machine check banks out of %u\n",
 			MAX_NR_BANKS, b);
 		b = MAX_NR_BANKS;
 	}
@@ -1419,7 +1422,7 @@ static void __mcheck_cpu_init_generic(void)
 static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
 {
 	if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
-		pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
+		pr_info("unknown CPU type - not enabling MCE support\n");
 		return -EOPNOTSUPP;
 	}
 
@@ -1574,7 +1577,7 @@ static void __mcheck_cpu_init_timer(void)
 /* Handle unconfigured int18 (should never happen) */
 static void unexpected_machine_check(struct pt_regs *regs, long error_code)
 {
-	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
+	pr_err("CPU#%d: Unexpected int18 (Machine Check)\n",
 	       smp_processor_id());
 }
 
@@ -1893,8 +1896,7 @@ static int __init mcheck_enable(char *str)
 			get_option(&str, &monarch_timeout);
 		}
 	} else {
-		printk(KERN_INFO "mce argument %s ignored. Please use /sys\n",
-		       str);
+		pr_info("mce argument %s ignored. Please use /sys\n", str);
 		return 0;
 	}
 	return 1;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546ec6aef..9e3f5d6e3d20 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -5,6 +5,8 @@
  * among events on a single PMU.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <linux/init.h>
@@ -1000,7 +1002,7 @@ static void intel_pmu_reset(void)
 
 	local_irq_save(flags);
 
-	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
+	pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
 
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
@@ -1638,14 +1640,14 @@ static __init void intel_clovertown_quirk(void)
 	 * But taken together it might just make sense to not enable PEBS on
 	 * these chips.
 	 */
-	printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
+	pr_warn("PEBS disabled due to CPU errata\n");
 	x86_pmu.pebs = 0;
 	x86_pmu.pebs_constraints = NULL;
 }
 
 static __init void intel_sandybridge_quirk(void)
 {
-	printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
+	pr_warn("PEBS disabled due to CPU errata\n");
 	x86_pmu.pebs = 0;
 	x86_pmu.pebs_constraints = NULL;
 }
@@ -1667,8 +1669,8 @@ static __init void intel_arch_events_quirk(void)
 	/* disable event that reported as not presend by cpuid */
 	for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
 		intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
-		printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n",
-				intel_arch_events_map[bit].name);
+		pr_warn("CPUID marked event: \'%s\' unavailable\n",
+			intel_arch_events_map[bit].name);
 	}
 }
 
@@ -1687,7 +1689,7 @@ static __init void intel_nehalem_quirk(void)
 		intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
 		ebx.split.no_branch_misses_retired = 0;
 		x86_pmu.events_maskl = ebx.full;
-		printk(KERN_INFO "CPU erratum AAJ80 worked around\n");
+		pr_info("CPU erratum AAJ80 worked around\n");
 	}
 }
 
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 571246d81edf..87d3b5d663cd 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -27,8 +27,8 @@ static int die_counter;
 
 void printk_address(unsigned long address, int reliable)
 {
-	printk(" [<%p>] %s%pB\n", (void *) address,
-			reliable ? "" : "? ", (void *) address);
+	pr_cont(" [<%p>] %s%pB\n",
+		(void *)address, reliable ? "" : "? ", (void *)address);
 }
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index e0b1d783daab..3a8aced11ae9 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -73,11 +73,11 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		if (kstack_end(stack))
 			break;
 		if (i && ((i % STACKSLOTS_PER_LINE) == 0))
-			printk(KERN_CONT "\n");
-		printk(KERN_CONT " %08lx", *stack++);
+			pr_cont("\n");
+		pr_cont(" %08lx", *stack++);
 		touch_nmi_watchdog();
 	}
-	printk(KERN_CONT "\n");
+	pr_cont("\n");
 	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
 }
 
@@ -89,9 +89,9 @@ void show_regs(struct pt_regs *regs)
 	print_modules();
 	__show_regs(regs, !user_mode_vm(regs));
 
-	printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
-		TASK_COMM_LEN, current->comm, task_pid_nr(current),
-		current_thread_info(), current, task_thread_info(current));
+	pr_emerg("Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
+		 TASK_COMM_LEN, current->comm, task_pid_nr(current),
+		 current_thread_info(), current, task_thread_info(current));
 	/*
 	 * When in-kernel, we also print out the stack and code at the
 	 * time of the fault..
@@ -102,10 +102,10 @@ void show_regs(struct pt_regs *regs)
 		unsigned char c;
 		u8 *ip;
 
-		printk(KERN_EMERG "Stack:\n");
+		pr_emerg("Stack:\n");
 		show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);
 
-		printk(KERN_EMERG "Code: ");
+		pr_emerg("Code:");
 
 		ip = (u8 *)regs->ip - code_prologue;
 		if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
@@ -116,16 +116,16 @@ void show_regs(struct pt_regs *regs)
 		for (i = 0; i < code_len; i++, ip++) {
 			if (ip < (u8 *)PAGE_OFFSET ||
 					probe_kernel_address(ip, c)) {
-				printk(KERN_CONT " Bad EIP value.");
+				pr_cont("  Bad EIP value.");
 				break;
 			}
 			if (ip == (u8 *)regs->ip)
-				printk(KERN_CONT "<%02x> ", c);
+				pr_cont(" <%02x>", c);
 			else
-				printk(KERN_CONT "%02x ", c);
+				pr_cont(" %02x", c);
 		}
 	}
-	printk(KERN_CONT "\n");
+	pr_cont("\n");
 }
 
 int is_valid_bugaddr(unsigned long ip)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 791b76122aa8..c582e9c5bd1a 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -228,20 +228,20 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		if (stack >= irq_stack && stack <= irq_stack_end) {
 			if (stack == irq_stack_end) {
 				stack = (unsigned long *) (irq_stack_end[-1]);
-				printk(KERN_CONT " <EOI> ");
+				pr_cont(" <EOI> ");
 			}
 		} else {
 		if (((long) stack & (THREAD_SIZE-1)) == 0)
 			break;
 		}
 		if (i && ((i % STACKSLOTS_PER_LINE) == 0))
-			printk(KERN_CONT "\n");
-		printk(KERN_CONT " %016lx", *stack++);
+			pr_cont("\n");
+		pr_cont(" %016lx", *stack++);
 		touch_nmi_watchdog();
 	}
 	preempt_enable();
 
-	printk(KERN_CONT "\n");
+	pr_cont("\n");
 	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
 }
 
@@ -256,8 +256,8 @@ void show_regs(struct pt_regs *regs)
 	printk("CPU %d ", cpu);
 	print_modules();
 	__show_regs(regs, 1);
-	printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
-		cur->comm, cur->pid, task_thread_info(cur), cur);
+	printk(KERN_DEFAULT "Process %s (pid: %d, threadinfo %p, task %p)\n",
+	       cur->comm, cur->pid, task_thread_info(cur), cur);
 
 	/*
 	 * When in-kernel, we also print out the stack and code at the
@@ -284,16 +284,16 @@ void show_regs(struct pt_regs *regs)
 		for (i = 0; i < code_len; i++, ip++) {
 			if (ip < (u8 *)PAGE_OFFSET ||
 					probe_kernel_address(ip, c)) {
-				printk(KERN_CONT " Bad RIP value.");
+				pr_cont(" Bad RIP value.");
 				break;
 			}
 			if (ip == (u8 *)regs->ip)
-				printk(KERN_CONT "<%02x> ", c);
+				pr_cont("<%02x> ", c);
 			else
-				printk(KERN_CONT "%02x ", c);
+				pr_cont("%02x ", c);
 		}
 	}
-	printk(KERN_CONT "\n");
+	pr_cont("\n");
 }
 
 int is_valid_bugaddr(unsigned long ip)
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3dafc6003b7c..1f5f1d5d2a02 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -294,9 +294,9 @@ void fixup_irqs(void)
 		raw_spin_unlock(&desc->lock);
 
 		if (break_affinity && set_affinity)
-			printk("Broke affinity for irq %i\n", irq);
+			pr_notice("Broke affinity for irq %i\n", irq);
 		else if (!set_affinity)
-			printk("Cannot set affinity for irq %i\n", irq);
+			pr_notice("Cannot set affinity for irq %i\n", irq);
 	}
 
 	/*
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index f21fd94ac897..202494d2ec6e 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -15,6 +15,9 @@
     along with this program; if not, write to the Free Software
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/moduleloader.h>
 #include <linux/elf.h>
 #include <linux/vmalloc.h>
@@ -30,9 +33,14 @@
 #include <asm/pgtable.h>
 
 #if 0
-#define DEBUGP printk
+#define DEBUGP(fmt, ...)				\
+	printk(KERN_DEBUG fmt, ##__VA_ARGS__)
 #else
-#define DEBUGP(fmt...)
+#define DEBUGP(fmt, ...)				\
+do {							\
+	if (0)						\
+		printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\
+} while (0)
 #endif
 
 void *module_alloc(unsigned long size)
@@ -56,8 +64,8 @@ int apply_relocate(Elf32_Shdr *sechdrs,
 	Elf32_Sym *sym;
 	uint32_t *location;
 
-	DEBUGP("Applying relocate section %u to %u\n", relsec,
-	       sechdrs[relsec].sh_info);
+	DEBUGP("Applying relocate section %u to %u\n",
+	       relsec, sechdrs[relsec].sh_info);
 	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
 		/* This is where to make the change */
 		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
@@ -77,7 +85,7 @@ int apply_relocate(Elf32_Shdr *sechdrs,
 			*location += sym->st_value - (uint32_t)location;
 			break;
 		default:
-			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
+			pr_err("%s: Unknown relocation: %u\n",
 			       me->name, ELF32_R_TYPE(rel[i].r_info));
 			return -ENOEXEC;
 		}
@@ -97,8 +105,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 	void *loc;
 	u64 val;
 
-	DEBUGP("Applying relocate section %u to %u\n", relsec,
-	       sechdrs[relsec].sh_info);
+	DEBUGP("Applying relocate section %u to %u\n",
+	       relsec, sechdrs[relsec].sh_info);
 	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
 		/* This is where to make the change */
 		loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
@@ -110,8 +118,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			+ ELF64_R_SYM(rel[i].r_info);
 
 		DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
-			(int)ELF64_R_TYPE(rel[i].r_info),
-			sym->st_value, rel[i].r_addend, (u64)loc);
+		       (int)ELF64_R_TYPE(rel[i].r_info),
+		       sym->st_value, rel[i].r_addend, (u64)loc);
 
 		val = sym->st_value + rel[i].r_addend;
 
@@ -140,7 +148,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 #endif
 			break;
 		default:
-			printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n",
+			pr_err("%s: Unknown rela relocation: %llu\n",
 			       me->name, ELF64_R_TYPE(rel[i].r_info));
 			return -ENOEXEC;
 		}
@@ -148,9 +156,9 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 	return 0;
 
 overflow:
-	printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
+	pr_err("overflow in relocation type %d val %Lx\n",
 	       (int)ELF64_R_TYPE(rel[i].r_info), val);
-	printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n",
+	pr_err("`%s' likely not compiled with -mcmodel=kernel\n",
 	       me->name);
 	return -ENOEXEC;
 }
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index b72838bae64a..299d49302e7d 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -22,6 +22,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
+#define pr_fmt(fmt) "Calgary: " fmt
+
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -245,7 +247,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
 		offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0,
 					  npages, 0, boundary_size, 0);
 		if (offset == ~0UL) {
-			printk(KERN_WARNING "Calgary: IOMMU full.\n");
+			pr_warn("IOMMU full\n");
 			spin_unlock_irqrestore(&tbl->it_lock, flags);
 			if (panic_on_overflow)
 				panic("Calgary: fix the allocator.\n");
@@ -271,8 +273,8 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
 	entry = iommu_range_alloc(dev, tbl, npages);
 
 	if (unlikely(entry == DMA_ERROR_CODE)) {
-		printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
-		       "iommu %p\n", npages, tbl);
+		pr_warn("failed to allocate %u pages in iommu %p\n",
+			npages, tbl);
 		return DMA_ERROR_CODE;
 	}
 
@@ -561,8 +563,7 @@ static void calgary_tce_cache_blast(struct iommu_table *tbl)
 		i++;
 	} while ((val & 0xff) != 0xff && i < 100);
 	if (i == 100)
-		printk(KERN_WARNING "Calgary: PCI bus not quiesced, "
-		       "continuing anyway\n");
+		pr_warn("PCI bus not quiesced, continuing anyway\n");
 
 	/* invalidate TCE cache */
 	target = calgary_reg(bbar, tar_offset(tbl->it_busno));
@@ -604,8 +605,7 @@ begin:
 		i++;
 	} while ((val64 & 0xff) != 0xff && i < 100);
 	if (i == 100)
-		printk(KERN_WARNING "CalIOC2: PCI bus not quiesced, "
-		       "continuing anyway\n");
+		pr_warn("CalIOC2: PCI bus not quiesced, continuing anyway\n");
 
 	/* 3. poll Page Migration DEBUG for SoftStopFault */
 	target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG);
@@ -617,8 +617,7 @@ begin:
 		if (++count < 100)
 			goto begin;
 		else {
-			printk(KERN_WARNING "CalIOC2: too many SoftStopFaults, "
-			       "aborting TCE cache flush sequence!\n");
+			pr_warn("CalIOC2: too many SoftStopFaults, aborting TCE cache flush sequence!\n");
 			return; /* pray for the best */
 		}
 	}
@@ -840,8 +839,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl)
 	plssr = be32_to_cpu(readl(target));
 
 	/* If no error, the agent ID in the CSR is not valid */
-	printk(KERN_EMERG "Calgary: DMA error on Calgary PHB 0x%x, "
-	       "0x%08x@CSR 0x%08x@PLSSR\n", tbl->it_busno, csr, plssr);
+	pr_emerg("DMA error on Calgary PHB 0x%x, 0x%08x@CSR 0x%08x@PLSSR\n",
+		 tbl->it_busno, csr, plssr);
 }
 
 static void calioc2_dump_error_regs(struct iommu_table *tbl)
@@ -867,22 +866,21 @@ static void calioc2_dump_error_regs(struct iommu_table *tbl)
 	target = calgary_reg(bbar, phboff | 0x800);
 	mck = be32_to_cpu(readl(target));
 
-	printk(KERN_EMERG "Calgary: DMA error on CalIOC2 PHB 0x%x\n",
-	       tbl->it_busno);
+	pr_emerg("DMA error on CalIOC2 PHB 0x%x\n", tbl->it_busno);
 
-	printk(KERN_EMERG "Calgary: 0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n",
-	       csr, plssr, csmr, mck);
+	pr_emerg("0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n",
+		 csr, plssr, csmr, mck);
 
 	/* dump rest of error regs */
-	printk(KERN_EMERG "Calgary: ");
+	pr_emerg("");
 	for (i = 0; i < ARRAY_SIZE(errregs); i++) {
 		/* err regs are at 0x810 - 0x870 */
 		erroff = (0x810 + (i * 0x10));
 		target = calgary_reg(bbar, phboff | erroff);
 		errregs[i] = be32_to_cpu(readl(target));
-		printk("0x%08x@0x%lx ", errregs[i], erroff);
+		pr_cont("0x%08x@0x%lx ", errregs[i], erroff);
 	}
-	printk("\n");
+	pr_cont("\n");
 
 	/* root complex status */
 	target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 735279e54e59..ef6a8456f719 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
@@ -145,16 +147,14 @@ void show_regs_common(void)
 	/* Board Name is optional */
 	board = dmi_get_system_info(DMI_BOARD_NAME);
 
-	printk(KERN_CONT "\n");
-	printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s",
-		current->pid, current->comm, print_tainted(),
-		init_utsname()->release,
-		(int)strcspn(init_utsname()->version, " "),
-		init_utsname()->version);
-	printk(KERN_CONT " %s %s", vendor, product);
-	if (board)
-		printk(KERN_CONT "/%s", board);
-	printk(KERN_CONT "\n");
+	printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s %s%s%s\n",
+	       current->pid, current->comm, print_tainted(),
+	       init_utsname()->release,
+	       (int)strcspn(init_utsname()->version, " "),
+	       init_utsname()->version,
+	       vendor, product,
+	       board ? "/" : "",
+	       board ? board : "");
 }
 
 void flush_thread(void)
@@ -645,7 +645,7 @@ static void amd_e400_idle(void)
 			amd_e400_c1e_detected = true;
 			if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
 				mark_tsc_unstable("TSC halt in AMD C1E");
-			printk(KERN_INFO "System has AMD C1E enabled\n");
+			pr_info("System has AMD C1E enabled\n");
 		}
 	}
 
@@ -659,8 +659,7 @@ static void amd_e400_idle(void)
 			 */
 			clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
 					   &cpu);
-			printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
-			       cpu);
+			pr_info("Switch to broadcast mode on CPU%d\n", cpu);
 		}
 		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
 
@@ -681,8 +680,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
 	if (pm_idle == poll_idle && smp_num_siblings > 1) {
-		printk_once(KERN_WARNING "WARNING: polling idle and HT enabled,"
-			" performance may degrade.\n");
+		pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
 	}
 #endif
 	if (pm_idle)
@@ -692,11 +690,11 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 		/*
 		 * One CPU supports mwait => All CPUs supports mwait
 		 */
-		printk(KERN_INFO "using mwait in idle threads.\n");
+		pr_info("using mwait in idle threads\n");
 		pm_idle = mwait_idle;
 	} else if (cpu_has_amd_erratum(amd_erratum_400)) {
 		/* E400: APIC timer interrupt does not wake up CPU from C1e */
-		printk(KERN_INFO "using AMD E400 aware idle routine\n");
+		pr_info("using AMD E400 aware idle routine\n");
 		pm_idle = amd_e400_idle;
 	} else
 		pm_idle = default_idle;
@@ -715,7 +713,7 @@ static int __init idle_setup(char *str)
 		return -EINVAL;
 
 	if (!strcmp(str, "poll")) {
-		printk("using polling idle threads.\n");
+		pr_info("using polling idle threads\n");
 		pm_idle = poll_idle;
 		boot_option_idle_override = IDLE_POLL;
 	} else if (!strcmp(str, "mwait")) {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 61cdf7fdf099..85151f3e36e2 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -117,10 +117,10 @@ void release_thread(struct task_struct *dead_task)
 {
 	if (dead_task->mm) {
 		if (dead_task->mm->context.size) {
-			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
-					dead_task->comm,
-					dead_task->mm->context.ldt,
-					dead_task->mm->context.size);
+			pr_warn("WARNING: dead process %8s still has LDT? <%p/%d>\n",
+				dead_task->comm,
+				dead_task->mm->context.ldt,
+				dead_task->mm->context.size);
 			BUG();
 		}
 	}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 79c45af81604..ab3f06260712 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/reboot.h>
 #include <linux/init.h>
@@ -152,7 +154,8 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
 {
 	if (reboot_type != BOOT_BIOS) {
 		reboot_type = BOOT_BIOS;
-		printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident);
+		pr_info("%s series board detected. Selecting %s-method for reboots.\n",
+			"BIOS", d->ident);
 	}
 	return 0;
 }
@@ -207,8 +210,8 @@ static int __init set_pci_reboot(const struct dmi_system_id *d)
 {
 	if (reboot_type != BOOT_CF9) {
 		reboot_type = BOOT_CF9;
-		printk(KERN_INFO "%s series board detected. "
-		       "Selecting PCI-method for reboots.\n", d->ident);
+		pr_info("%s series board detected. Selecting %s-method for reboots.\n",
+			"PCI", d->ident);
 	}
 	return 0;
 }
@@ -217,7 +220,8 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d)
 {
 	if (reboot_type != BOOT_KBD) {
 		reboot_type = BOOT_KBD;
-		printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident);
+		pr_info("%s series board detected. Selecting %s-method for reboot.\n",
+			"KBD", d->ident);
 	}
 	return 0;
 }
@@ -668,7 +672,7 @@ static void __machine_emergency_restart(int emergency)
 
 static void native_machine_restart(char *__unused)
 {
-	printk("machine restart\n");
+	pr_notice("machine restart\n");
 
 	if (!reboot_force)
 		machine_shutdown();
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 21af737053aa..b280908a376e 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -6,6 +6,9 @@
  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
  *  2000-2002   x86-64 support by Andi Kleen
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
@@ -814,7 +817,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
 		       me->comm, me->pid, where, frame,
 		       regs->ip, regs->sp, regs->orig_ax);
 		print_vma_addr(" in ", regs->ip);
-		printk(KERN_CONT "\n");
+		pr_cont("\n");
 	}
 
 	force_sig(SIGSEGV, me);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fd019d78b1f4..456d64806c8f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1,4 +1,4 @@
-/*
+ /*
  *	x86 SMP booting functions
  *
  *	(c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
@@ -39,6 +39,8 @@
  *	Glauber Costa		:	i386 and x86_64 integration
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/smp.h>
 #include <linux/module.h>
@@ -184,7 +186,7 @@ static void __cpuinit smp_callin(void)
 	 * boards)
 	 */
 
-	pr_debug("CALLIN, before setup_local_APIC().\n");
+	pr_debug("CALLIN, before setup_local_APIC()\n");
 	if (apic->smp_callin_clear_local_apic)
 		apic->smp_callin_clear_local_apic();
 	setup_local_APIC();
@@ -420,17 +422,16 @@ static void impress_friends(void)
 	/*
 	 * Allow the user to impress friends.
 	 */
-	pr_debug("Before bogomips.\n");
+	pr_debug("Before bogomips\n");
 	for_each_possible_cpu(cpu)
 		if (cpumask_test_cpu(cpu, cpu_callout_mask))
 			bogosum += cpu_data(cpu).loops_per_jiffy;
-	printk(KERN_INFO
-		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+	pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
 		num_online_cpus(),
 		bogosum/(500000/HZ),
 		(bogosum/(5000/HZ))%100);
 
-	pr_debug("Before bogocount - setting activated=1.\n");
+	pr_debug("Before bogocount - setting activated=1\n");
 }
 
 void __inquire_remote_apic(int apicid)
@@ -440,18 +441,17 @@ void __inquire_remote_apic(int apicid)
 	int timeout;
 	u32 status;
 
-	printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid);
+	pr_info("Inquiring remote APIC 0x%x...\n", apicid);
 
 	for (i = 0; i < ARRAY_SIZE(regs); i++) {
-		printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]);
+		pr_info("... APIC 0x%x %s: ", apicid, names[i]);
 
 		/*
 		 * Wait for idle.
 		 */
 		status = safe_apic_wait_icr_idle();
 		if (status)
-			printk(KERN_CONT
-			       "a previous APIC delivery may have failed\n");
+			pr_cont("a previous APIC delivery may have failed\n");
 
 		apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
 
@@ -464,10 +464,10 @@ void __inquire_remote_apic(int apicid)
 		switch (status) {
 		case APIC_ICR_RR_VALID:
 			status = apic_read(APIC_RRR);
-			printk(KERN_CONT "%08x\n", status);
+			pr_cont("%08x\n", status);
 			break;
 		default:
-			printk(KERN_CONT "failed\n");
+			pr_cont("failed\n");
 		}
 	}
 }
@@ -501,12 +501,12 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
 			apic_write(APIC_ESR, 0);
 		accept_status = (apic_read(APIC_ESR) & 0xEF);
 	}
-	pr_debug("NMI sent.\n");
+	pr_debug("NMI sent\n");
 
 	if (send_status)
-		printk(KERN_ERR "APIC never delivered???\n");
+		pr_err("APIC never delivered???\n");
 	if (accept_status)
-		printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
+		pr_err("APIC delivery error (%lx)\n", accept_status);
 
 	return (send_status | accept_status);
 }
@@ -528,7 +528,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 		apic_read(APIC_ESR);
 	}
 
-	pr_debug("Asserting INIT.\n");
+	pr_debug("Asserting INIT\n");
 
 	/*
 	 * Turn INIT on target chip
@@ -544,7 +544,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 
 	mdelay(10);
 
-	pr_debug("Deasserting INIT.\n");
+	pr_debug("Deasserting INIT\n");
 
 	/* Target chip */
 	/* Send IPI */
@@ -577,14 +577,14 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 	/*
 	 * Run STARTUP IPI loop.
 	 */
-	pr_debug("#startup loops: %d.\n", num_starts);
+	pr_debug("#startup loops: %d\n", num_starts);
 
 	for (j = 1; j <= num_starts; j++) {
-		pr_debug("Sending STARTUP #%d.\n", j);
+		pr_debug("Sending STARTUP #%d\n", j);
 		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP.  */
 			apic_write(APIC_ESR, 0);
 		apic_read(APIC_ESR);
-		pr_debug("After apic_write.\n");
+		pr_debug("After apic_write\n");
 
 		/*
 		 * STARTUP IPI
@@ -601,7 +601,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 		 */
 		udelay(300);
 
-		pr_debug("Startup point 1.\n");
+		pr_debug("Startup point 1\n");
 
 		pr_debug("Waiting for send to finish...\n");
 		send_status = safe_apic_wait_icr_idle();
@@ -616,12 +616,12 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 		if (send_status || accept_status)
 			break;
 	}
-	pr_debug("After Startup.\n");
+	pr_debug("After Startup\n");
 
 	if (send_status)
-		printk(KERN_ERR "APIC never delivered???\n");
+		pr_err("APIC never delivered???\n");
 	if (accept_status)
-		printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
+		pr_err("APIC delivery error (%lx)\n", accept_status);
 
 	return (send_status | accept_status);
 }
@@ -635,11 +635,11 @@ static void __cpuinit announce_cpu(int cpu, int apicid)
 	if (system_state == SYSTEM_BOOTING) {
 		if (node != current_node) {
 			if (current_node > (-1))
-				pr_cont(" Ok.\n");
+				pr_cont(" OK\n");
 			current_node = node;
 			pr_info("Booting Node %3d, Processors ", node);
 		}
-		pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : "");
+		pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : "");
 		return;
 	} else
 		pr_info("Booting Node %d Processor %d APIC 0x%x\n",
@@ -719,9 +719,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 		/*
 		 * allow APs to start initializing.
 		 */
-		pr_debug("Before Callout %d.\n", cpu);
+		pr_debug("Before Callout %d\n", cpu);
 		cpumask_set_cpu(cpu, cpu_callout_mask);
-		pr_debug("After Callout %d.\n", cpu);
+		pr_debug("After Callout %d\n", cpu);
 
 		/*
 		 * Wait 5s total for a response
@@ -749,7 +749,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 				pr_err("CPU%d: Stuck ??\n", cpu);
 			else
 				/* trampoline code not run */
-				pr_err("CPU%d: Not responding.\n", cpu);
+				pr_err("CPU%d: Not responding\n", cpu);
 			if (apic->inquire_remote_apic)
 				apic->inquire_remote_apic(apicid);
 		}
@@ -794,7 +794,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
 	if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
 	    !physid_isset(apicid, phys_cpu_present_map) ||
 	    !apic->apic_id_valid(apicid)) {
-		printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu);
+		pr_err("%s: bad cpu %d\n", __func__, cpu);
 		return -EINVAL;
 	}
 
@@ -875,9 +875,8 @@ static int __init smp_sanity_check(unsigned max_cpus)
 		unsigned int cpu;
 		unsigned nr;
 
-		printk(KERN_WARNING
-		       "More than 8 CPUs detected - skipping them.\n"
-		       "Use CONFIG_X86_BIGSMP.\n");
+		pr_warn("More than 8 CPUs detected - skipping them\n"
+			"Use CONFIG_X86_BIGSMP\n");
 
 		nr = 0;
 		for_each_present_cpu(cpu) {
@@ -898,8 +897,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
 #endif
 
 	if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
-		printk(KERN_WARNING
-			"weird, boot CPU (#%d) not listed by the BIOS.\n",
+		pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n",
 			hard_smp_processor_id());
 
 		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
@@ -911,11 +909,10 @@ static int __init smp_sanity_check(unsigned max_cpus)
 	 */
 	if (!smp_found_config && !acpi_lapic) {
 		preempt_enable();
-		printk(KERN_NOTICE "SMP motherboard not detected.\n");
+		pr_notice("SMP motherboard not detected\n");
 		disable_smp();
 		if (APIC_init_uniprocessor())
-			printk(KERN_NOTICE "Local APIC not detected."
-					   " Using dummy APIC emulation.\n");
+			pr_notice("Local APIC not detected. Using dummy APIC emulation.\n");
 		return -1;
 	}
 
@@ -924,9 +921,8 @@ static int __init smp_sanity_check(unsigned max_cpus)
 	 * CPU too, but we do it for the sake of robustness anyway.
 	 */
 	if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
-		printk(KERN_NOTICE
-			"weird, boot CPU (#%d) not listed by the BIOS.\n",
-			boot_cpu_physical_apicid);
+		pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n",
+			  boot_cpu_physical_apicid);
 		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
 	}
 	preempt_enable();
@@ -939,8 +935,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
 		if (!disable_apic) {
 			pr_err("BIOS bug, local APIC #%d not detected!...\n",
 				boot_cpu_physical_apicid);
-			pr_err("... forcing use of dummy APIC emulation."
-				"(tell your hw vendor)\n");
+			pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n");
 		}
 		smpboot_clear_io_apic();
 		disable_ioapic_support();
@@ -953,7 +948,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
 	 * If SMP should be disabled, then really disable it!
 	 */
 	if (!max_cpus) {
-		printk(KERN_INFO "SMP mode deactivated.\n");
+		pr_info("SMP mode deactivated\n");
 		smpboot_clear_io_apic();
 
 		connect_bsp_APIC();
@@ -1005,7 +1000,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 
 
 	if (smp_sanity_check(max_cpus) < 0) {
-		printk(KERN_INFO "SMP disabled\n");
+		pr_info("SMP disabled\n");
 		disable_smp();
 		goto out;
 	}
@@ -1043,7 +1038,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	 * Set up local APIC timer on boot CPU.
 	 */
 
-	printk(KERN_INFO "CPU%d: ", 0);
+	pr_info("CPU%d: ", 0);
 	print_cpu_info(&cpu_data(0));
 	x86_init.timers.setup_percpu_clockev();
 
@@ -1093,7 +1088,7 @@ void __init native_smp_prepare_boot_cpu(void)
 
 void __init native_smp_cpus_done(unsigned int max_cpus)
 {
-	pr_debug("Boot done.\n");
+	pr_debug("Boot done\n");
 
 	nmi_selftest();
 	impress_friends();
@@ -1154,8 +1149,7 @@ __init void prefill_possible_map(void)
 
 	/* nr_cpu_ids could be reduced via nr_cpus= */
 	if (possible > nr_cpu_ids) {
-		printk(KERN_WARNING
-			"%d Processors exceeds NR_CPUS limit of %d\n",
+		pr_warn("%d Processors exceeds NR_CPUS limit of %d\n",
 			possible, nr_cpu_ids);
 		possible = nr_cpu_ids;
 	}
@@ -1164,13 +1158,12 @@ __init void prefill_possible_map(void)
 	if (!setup_max_cpus)
 #endif
 	if (possible > i) {
-		printk(KERN_WARNING
-			"%d Processors exceeds max_cpus limit of %u\n",
+		pr_warn("%d Processors exceeds max_cpus limit of %u\n",
 			possible, setup_max_cpus);
 		possible = i;
 	}
 
-	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
+	pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
 		possible, max_t(int, possible - num_processors, 0));
 
 	for (i = 0; i < possible; i++)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 05b31d92f69c..b481341c9369 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -9,6 +9,9 @@
 /*
  * Handle hardware traps and faults.
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/interrupt.h>
 #include <linux/kallsyms.h>
 #include <linux/spinlock.h>
@@ -143,12 +146,11 @@ trap_signal:
 #ifdef CONFIG_X86_64
 	if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
 	    printk_ratelimit()) {
-		printk(KERN_INFO
-		       "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
-		       tsk->comm, tsk->pid, str,
-		       regs->ip, regs->sp, error_code);
+		pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
+			tsk->comm, tsk->pid, str,
+			regs->ip, regs->sp, error_code);
 		print_vma_addr(" in ", regs->ip);
-		printk("\n");
+		pr_cont("\n");
 	}
 #endif
 
@@ -269,12 +271,11 @@ do_general_protection(struct pt_regs *regs, long error_code)
 
 	if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 			printk_ratelimit()) {
-		printk(KERN_INFO
-			"%s[%d] general protection ip:%lx sp:%lx error:%lx",
+		pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx",
 			tsk->comm, task_pid_nr(tsk),
 			regs->ip, regs->sp, error_code);
 		print_vma_addr(" in ", regs->ip);
-		printk("\n");
+		pr_cont("\n");
 	}
 
 	force_sig(SIGSEGV, tsk);
@@ -570,7 +571,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
 	conditional_sti(regs);
 #if 0
 	/* No need to warn about this any longer. */
-	printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
+	pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
 #endif
 }
 
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index fc0a147e3727..cfa5d4f7ca56 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/init.h>
@@ -84,8 +86,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable);
 #ifdef CONFIG_X86_TSC
 int __init notsc_setup(char *str)
 {
-	printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
-			"cannot disable TSC completely.\n");
+	pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n");
 	tsc_disabled = 1;
 	return 1;
 }
@@ -373,7 +374,7 @@ static unsigned long quick_pit_calibrate(void)
 			goto success;
 		}
 	}
-	printk("Fast TSC calibration failed\n");
+	pr_err("Fast TSC calibration failed\n");
 	return 0;
 
 success:
@@ -392,7 +393,7 @@ success:
 	 */
 	delta *= PIT_TICK_RATE;
 	do_div(delta, i*256*1000);
-	printk("Fast TSC calibration using PIT\n");
+	pr_info("Fast TSC calibration using PIT\n");
 	return delta;
 }
 
@@ -487,9 +488,8 @@ unsigned long native_calibrate_tsc(void)
 		 * use the reference value, as it is more precise.
 		 */
 		if (delta >= 90 && delta <= 110) {
-			printk(KERN_INFO
-			       "TSC: PIT calibration matches %s. %d loops\n",
-			       hpet ? "HPET" : "PMTIMER", i + 1);
+			pr_info("PIT calibration matches %s. %d loops\n",
+				hpet ? "HPET" : "PMTIMER", i + 1);
 			return tsc_ref_min;
 		}
 
@@ -511,38 +511,36 @@ unsigned long native_calibrate_tsc(void)
 	 */
 	if (tsc_pit_min == ULONG_MAX) {
 		/* PIT gave no useful value */
-		printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n");
+		pr_warn("Unable to calibrate against PIT\n");
 
 		/* We don't have an alternative source, disable TSC */
 		if (!hpet && !ref1 && !ref2) {
-			printk("TSC: No reference (HPET/PMTIMER) available\n");
+			pr_notice("No reference (HPET/PMTIMER) available\n");
 			return 0;
 		}
 
 		/* The alternative source failed as well, disable TSC */
 		if (tsc_ref_min == ULONG_MAX) {
-			printk(KERN_WARNING "TSC: HPET/PMTIMER calibration "
-			       "failed.\n");
+			pr_warn("HPET/PMTIMER calibration failed\n");
 			return 0;
 		}
 
 		/* Use the alternative source */
-		printk(KERN_INFO "TSC: using %s reference calibration\n",
-		       hpet ? "HPET" : "PMTIMER");
+		pr_info("using %s reference calibration\n",
+			hpet ? "HPET" : "PMTIMER");
 
 		return tsc_ref_min;
 	}
 
 	/* We don't have an alternative source, use the PIT calibration value */
 	if (!hpet && !ref1 && !ref2) {
-		printk(KERN_INFO "TSC: Using PIT calibration value\n");
+		pr_info("Using PIT calibration value\n");
 		return tsc_pit_min;
 	}
 
 	/* The alternative source failed, use the PIT calibration value */
 	if (tsc_ref_min == ULONG_MAX) {
-		printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. "
-		       "Using PIT calibration\n");
+		pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n");
 		return tsc_pit_min;
 	}
 
@@ -551,9 +549,9 @@ unsigned long native_calibrate_tsc(void)
 	 * the PIT value as we know that there are PMTIMERs around
 	 * running at double speed. At least we let the user know:
 	 */
-	printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n",
-	       hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
-	printk(KERN_INFO "TSC: Using PIT calibration value\n");
+	pr_warn("PIT calibration deviates from %s: %lu %lu\n",
+		hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
+	pr_info("Using PIT calibration value\n");
 	return tsc_pit_min;
 }
 
@@ -785,7 +783,7 @@ void mark_tsc_unstable(char *reason)
 		tsc_unstable = 1;
 		sched_clock_stable = 0;
 		disable_sched_clock_irqtime();
-		printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
+		pr_info("Marking TSC unstable due to %s\n", reason);
 		/* Change only the rating, when not registered */
 		if (clocksource_tsc.mult)
 			clocksource_mark_unstable(&clocksource_tsc);
@@ -912,9 +910,9 @@ static void tsc_refine_calibration_work(struct work_struct *work)
 		goto out;
 
 	tsc_khz = freq;
-	printk(KERN_INFO "Refined TSC clocksource calibration: "
-		"%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000,
-					(unsigned long)tsc_khz % 1000);
+	pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n",
+		(unsigned long)tsc_khz / 1000,
+		(unsigned long)tsc_khz % 1000);
 
 out:
 	clocksource_register_khz(&clocksource_tsc, tsc_khz);
@@ -970,9 +968,9 @@ void __init tsc_init(void)
 		return;
 	}
 
-	printk("Detected %lu.%03lu MHz processor.\n",
-			(unsigned long)cpu_khz / 1000,
-			(unsigned long)cpu_khz % 1000);
+	pr_info("Detected %lu.%03lu MHz processor\n",
+		(unsigned long)cpu_khz / 1000,
+		(unsigned long)cpu_khz % 1000);
 
 	/*
 	 * Secondary CPUs do not run through tsc_init(), so set up
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 255f58ae71e8..54abcc0baf23 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -28,6 +28,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
@@ -137,14 +139,14 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
 	local_irq_enable();
 
 	if (!current->thread.vm86_info) {
-		printk("no vm86_info: BAD\n");
+		pr_alert("no vm86_info: BAD\n");
 		do_exit(SIGSEGV);
 	}
 	set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask);
 	tmp = copy_vm86_regs_to_user(&current->thread.vm86_info->regs, regs);
 	tmp += put_user(current->thread.screen_bitmap, &current->thread.vm86_info->screen_bitmap);
 	if (tmp) {
-		printk("vm86: could not access userspace vm86_info\n");
+		pr_alert("could not access userspace vm86_info\n");
 		do_exit(SIGSEGV);
 	}
 
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 7515cf0e1805..acdc125ad44e 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -18,6 +18,8 @@
  *  use the vDSO.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -111,18 +113,13 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
 static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
 			      const char *message)
 {
-	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
-	struct task_struct *tsk;
-
-	if (!show_unhandled_signals || !__ratelimit(&rs))
+	if (!show_unhandled_signals)
 		return;
 
-	tsk = current;
-
-	printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
-	       level, tsk->comm, task_pid_nr(tsk),
-	       message, regs->ip, regs->cs,
-	       regs->sp, regs->ax, regs->si, regs->di);
+	pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
+			      level, current->comm, task_pid_nr(current),
+			      message, regs->ip, regs->cs,
+			      regs->sp, regs->ax, regs->si, regs->di);
 }
 
 static int addr_to_vsyscall_nr(unsigned long addr)
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index bd18149b2b0f..3d3e20709119 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -3,6 +3,9 @@
  *
  * Author: Suresh Siddha <suresh.b.siddha@intel.com>
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/bootmem.h>
 #include <linux/compat.h>
 #include <asm/i387.h>
@@ -162,7 +165,7 @@ int save_i387_xstate(void __user *buf)
 	BUG_ON(sig_xstate_size < xstate_size);
 
 	if ((unsigned long)buf % 64)
-		printk("save_i387_xstate: bad fpstate %p\n", buf);
+		pr_err("%s: bad fpstate %p\n", __func__, buf);
 
 	if (!used_math())
 		return 0;
@@ -422,7 +425,7 @@ static void __init xstate_enable_boot_cpu(void)
 	pcntxt_mask = eax + ((u64)edx << 32);
 
 	if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
-		printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n",
+		pr_err("FP/SSE not shown under xsave features 0x%llx\n",
 		       pcntxt_mask);
 		BUG();
 	}
@@ -445,9 +448,8 @@ static void __init xstate_enable_boot_cpu(void)
 
 	setup_xstate_init();
 
-	printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, "
-	       "cntxt size 0x%x\n",
-	       pcntxt_mask, xstate_size);
+	pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
+		pcntxt_mask, xstate_size);
 }
 
 /*
-- 
cgit v1.2.3


From 332afa656e76458ee9cf0f0d123016a0658539e4 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 21 May 2012 16:58:01 -0700
Subject: x86/irq: Update irq_cfg domain unless the new affinity is a subset of
 the current domain

Until now, irq_cfg domain is mostly static. Either all CPU's
(used by flat mode) or one CPU (first CPU in the irq afffinity
mask) to which irq is being migrated (this is used by the rest
of apic modes).

Upcoming x2apic cluster mode optimization patch allows the irq
to be sent to any CPU in the x2apic cluster (if supported by the
HW). So irq_cfg domain changes on the fly (depending on which
CPU in the x2apic cluster is online).

Instead of checking for any intersection between the new irq
affinity mask and the current irq_cfg domain, check if the new
irq affinity mask is a subset of the current irq_cfg domain.
Otherwise proceed with updating the irq_cfg domain aswell as
assigning vector's on all the CPUs specified in the new mask.

This also cleans up a workaround in updating irq_cfg domain for
legacy irq's that are handled by the IO-APIC.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: yinghai@kernel.org
Cc: gorcunov@openvz.org
Cc: agordeev@redhat.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1337644682-19854-1-git-send-email-suresh.b.siddha@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/io_apic.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ac96561d1a99..910a3118438b 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1126,8 +1126,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 	old_vector = cfg->vector;
 	if (old_vector) {
 		cpumask_and(tmp_mask, mask, cpu_online_mask);
-		cpumask_and(tmp_mask, cfg->domain, tmp_mask);
-		if (!cpumask_empty(tmp_mask)) {
+		if (cpumask_subset(tmp_mask, cfg->domain)) {
 			free_cpumask_var(tmp_mask);
 			return 0;
 		}
@@ -1141,6 +1140,11 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 
 		apic->vector_allocation_domain(cpu, tmp_mask);
 
+		if (cpumask_subset(tmp_mask, cfg->domain)) {
+			free_cpumask_var(tmp_mask);
+			return 0;
+		}
+
 		vector = current_vector;
 		offset = current_offset;
 next:
@@ -1346,13 +1350,6 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
 
 	if (!IO_APIC_IRQ(irq))
 		return;
-	/*
-	 * For legacy irqs, cfg->domain starts with cpu 0 for legacy
-	 * controllers like 8259. Now that IO-APIC can handle this irq, update
-	 * the cfg->domain.
-	 */
-	if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
-		apic->vector_allocation_domain(0, cfg->domain);
 
 	if (assign_irq_vector(irq, cfg, apic->target_cpus()))
 		return;
-- 
cgit v1.2.3


From 0b8255e660a0c229ebfe8f9fde12a8d4d34c50e0 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 21 May 2012 16:58:02 -0700
Subject: x86/x2apic/cluster: Use all the members of one cluster specified in
 the smp_affinity mask for the interrupt destination

If the HW implements round-robin interrupt delivery, this
enables multiple cpu's (which are part of the user specified
interrupt smp_affinity mask and belong to the same x2apic
cluster) to service the interrupt.

Also if the platform supports Power Aware Interrupt Routing,
then this enables the interrupt to be routed to an idle cpu or a
busy cpu depending on the perf/power bias tunable.

We are now grouping all the cpu's in a cluster to one vector
domain. So that will limit the total number of interrupt sources
handled by Linux. Previously we support "cpu-count *
available-vectors-per-cpu" interrupt sources but this will now
reduce to "cpu-count/16 * available-vectors-per-cpu".

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: yinghai@kernel.org
Cc: gorcunov@openvz.org
Cc: agordeev@redhat.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1337644682-19854-2-git-send-email-suresh.b.siddha@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/x2apic.h         |  9 ------
 arch/x86/kernel/apic/x2apic_cluster.c | 56 ++++++++++++++++++++++++-----------
 arch/x86/kernel/apic/x2apic_phys.c    |  9 ++++++
 3 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h
index 92e54abf89e0..7a5a832a99b6 100644
--- a/arch/x86/include/asm/x2apic.h
+++ b/arch/x86/include/asm/x2apic.h
@@ -28,15 +28,6 @@ static int x2apic_apic_id_registered(void)
 	return 1;
 }
 
-/*
- * For now each logical cpu is in its own vector allocation domain.
- */
-static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
-}
-
 static void
 __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
 {
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index ff35cff0e1a7..90d999c7f2ea 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -98,34 +98,47 @@ static void x2apic_send_IPI_all(int vector)
 
 static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
-	/*
-	 * We're using fixed IRQ delivery, can only return one logical APIC ID.
-	 * May as well be the first.
-	 */
 	int cpu = cpumask_first(cpumask);
+	u32 dest = 0;
+	int i;
 
-	if ((unsigned)cpu < nr_cpu_ids)
-		return per_cpu(x86_cpu_to_logical_apicid, cpu);
-	else
+	if (cpu > nr_cpu_ids)
 		return BAD_APICID;
+
+	for_each_cpu_and(i, cpumask, per_cpu(cpus_in_cluster, cpu))
+		dest |= per_cpu(x86_cpu_to_logical_apicid, i);
+
+	return dest;
 }
 
 static unsigned int
 x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 			      const struct cpumask *andmask)
 {
-	int cpu;
+	u32 dest = 0;
+	u16 cluster;
+	int i;
 
-	/*
-	 * We're using fixed IRQ delivery, can only return one logical APIC ID.
-	 * May as well be the first.
-	 */
-	for_each_cpu_and(cpu, cpumask, andmask) {
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
+	for_each_cpu_and(i, cpumask, andmask) {
+		if (!cpumask_test_cpu(i, cpu_online_mask))
+			continue;
+		dest = per_cpu(x86_cpu_to_logical_apicid, i);
+		cluster = x2apic_cluster(i);
+		break;
 	}
 
-	return per_cpu(x86_cpu_to_logical_apicid, cpu);
+	if (!dest)
+		return BAD_APICID;
+
+	for_each_cpu_and(i, cpumask, andmask) {
+		if (!cpumask_test_cpu(i, cpu_online_mask))
+			continue;
+		if (cluster != x2apic_cluster(i))
+			continue;
+		dest |= per_cpu(x86_cpu_to_logical_apicid, i);
+	}
+
+	return dest;
 }
 
 static void init_x2apic_ldr(void)
@@ -208,6 +221,15 @@ static int x2apic_cluster_probe(void)
 		return 0;
 }
 
+/*
+ * Each x2apic cluster is an allocation domain.
+ */
+static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+	cpumask_clear(retmask);
+	cpumask_copy(retmask, per_cpu(cpus_in_cluster, cpu));
+}
+
 static struct apic apic_x2apic_cluster = {
 
 	.name				= "cluster x2apic",
@@ -225,7 +247,7 @@ static struct apic apic_x2apic_cluster = {
 	.check_apicid_used		= NULL,
 	.check_apicid_present		= NULL,
 
-	.vector_allocation_domain	= x2apic_vector_allocation_domain,
+	.vector_allocation_domain	= cluster_vector_allocation_domain,
 	.init_apic_ldr			= init_x2apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index c17e982db275..93b25706f177 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -120,6 +120,15 @@ static int x2apic_phys_probe(void)
 	return apic == &apic_x2apic_phys;
 }
 
+/*
+ * Each logical cpu is in its own vector allocation domain.
+ */
+static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+	cpumask_clear(retmask);
+	cpumask_set_cpu(cpu, retmask);
+}
+
 static struct apic apic_x2apic_phys = {
 
 	.name				= "physical x2apic",
-- 
cgit v1.2.3


From 49d0c7a0a425a89190b7c3b1445faba9eb227bec Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Tue, 5 Jun 2012 13:23:15 +0200
Subject: x86/apic: Trivial whitespace fixes

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/20120605112310.GA11443@dhcp-26-207.brq.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/apic_flat_64.c   | 2 +-
 arch/x86/kernel/apic/io_apic.c        | 4 ++--
 arch/x86/kernel/apic/x2apic_cluster.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 0e881c46e8c8..de279b32ceb1 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -92,7 +92,7 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
 }
 
 static void
- flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
+flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
 {
 	unsigned long mask = cpumask_bits(cpumask)[0];
 	int cpu = smp_processor_id();
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 910a3118438b..74c569791e75 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1363,7 +1363,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
 		    cfg->vector, irq, attr->trigger, attr->polarity, dest);
 
 	if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) {
-		pr_warn("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+		pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
 			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
 		__clear_irq_vector(irq, cfg);
 
@@ -1466,7 +1466,7 @@ void setup_IO_APIC_irq_extra(u32 gsi)
  * Set up the timer pin, possibly with the 8259A-master behind.
  */
 static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
-					 unsigned int pin, int vector)
+					unsigned int pin, int vector)
 {
 	struct IO_APIC_route_entry entry;
 
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 90d999c7f2ea..2919e45d30c3 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -81,7 +81,7 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
 }
 
 static void
- x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
 {
 	__x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);
 }
-- 
cgit v1.2.3


From bf721d3a3bc7a731add45c8078b142b494ab413e Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Tue, 5 Jun 2012 13:23:29 +0200
Subject: x86/apic: Factor out default target_cpus() operation

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/20120605112324.GA11449@dhcp-26-207.brq.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/apic.h           |  5 +++++
 arch/x86/include/asm/x2apic.h         |  9 ---------
 arch/x86/kernel/apic/apic_flat_64.c   | 14 ++------------
 arch/x86/kernel/apic/apic_numachip.c  |  7 +------
 arch/x86/kernel/apic/bigsmp_32.c      | 11 +----------
 arch/x86/kernel/apic/x2apic_cluster.c |  2 +-
 arch/x86/kernel/apic/x2apic_phys.c    |  2 +-
 arch/x86/kernel/apic/x2apic_uv_x.c    |  7 +------
 8 files changed, 12 insertions(+), 45 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index eaff4790ed96..fc38195d6405 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -537,6 +537,11 @@ static inline const struct cpumask *default_target_cpus(void)
 #endif
 }
 
+static inline const struct cpumask *online_target_cpus(void)
+{
+	return cpu_online_mask;
+}
+
 DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
 
 
diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h
index 7a5a832a99b6..f90f0a587c66 100644
--- a/arch/x86/include/asm/x2apic.h
+++ b/arch/x86/include/asm/x2apic.h
@@ -9,15 +9,6 @@
 #include <asm/ipi.h>
 #include <linux/cpumask.h>
 
-/*
- * Need to use more than cpu 0, because we need more vectors
- * when MSI-X are used.
- */
-static const struct cpumask *x2apic_target_cpus(void)
-{
-	return cpu_online_mask;
-}
-
 static int x2apic_apic_id_valid(int apicid)
 {
 	return 1;
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index de279b32ceb1..61ac1afeff07 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -36,11 +36,6 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 1;
 }
 
-static const struct cpumask *flat_target_cpus(void)
-{
-	return cpu_online_mask;
-}
-
 static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
@@ -186,7 +181,7 @@ static struct apic apic_flat =  {
 	.irq_delivery_mode		= dest_LowestPrio,
 	.irq_dest_mode			= 1, /* logical */
 
-	.target_cpus			= flat_target_cpus,
+	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
@@ -262,11 +257,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;
 }
 
-static const struct cpumask *physflat_target_cpus(void)
-{
-	return cpu_online_mask;
-}
-
 static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	cpumask_clear(retmask);
@@ -345,7 +335,7 @@ static struct apic apic_physflat =  {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* physical */
 
-	.target_cpus			= physflat_target_cpus,
+	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 6ec6d5d297c3..3255a60fcc95 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -72,11 +72,6 @@ static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
 	return initial_apic_id >> index_msb;
 }
 
-static const struct cpumask *numachip_target_cpus(void)
-{
-	return cpu_online_mask;
-}
-
 static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	cpumask_clear(retmask);
@@ -253,7 +248,7 @@ static struct apic apic_numachip __refconst = {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* physical */
 
-	.target_cpus			= numachip_target_cpus,
+	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 31fbdbfbf960..c288e81e00ff 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -26,15 +26,6 @@ static int bigsmp_apic_id_registered(void)
 	return 1;
 }
 
-static const struct cpumask *bigsmp_target_cpus(void)
-{
-#ifdef CONFIG_SMP
-	return cpu_online_mask;
-#else
-	return cpumask_of(0);
-#endif
-}
-
 static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
 {
 	return 0;
@@ -205,7 +196,7 @@ static struct apic apic_bigsmp = {
 	/* phys delivery to target CPU: */
 	.irq_dest_mode			= 0,
 
-	.target_cpus			= bigsmp_target_cpus,
+	.target_cpus			= default_target_cpus,
 	.disable_esr			= 1,
 	.dest_logical			= 0,
 	.check_apicid_used		= bigsmp_check_apicid_used,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 2919e45d30c3..612622c47dfb 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -241,7 +241,7 @@ static struct apic apic_x2apic_cluster = {
 	.irq_delivery_mode		= dest_LowestPrio,
 	.irq_dest_mode			= 1, /* logical */
 
-	.target_cpus			= x2apic_target_cpus,
+	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 93b25706f177..b1a8b39e3c3f 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -140,7 +140,7 @@ static struct apic apic_x2apic_phys = {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* physical */
 
-	.target_cpus			= x2apic_target_cpus,
+	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index c6d03f7a4401..16efb92bfea5 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -185,11 +185,6 @@ EXPORT_SYMBOL_GPL(uv_possible_blades);
 unsigned long sn_rtc_cycles_per_second;
 EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 
-static const struct cpumask *uv_target_cpus(void)
-{
-	return cpu_online_mask;
-}
-
 static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	cpumask_clear(retmask);
@@ -362,7 +357,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* physical */
 
-	.target_cpus			= uv_target_cpus,
+	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
-- 
cgit v1.2.3


From 6398268d2bc454735f11e08705e858f9fdf5c750 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Tue, 5 Jun 2012 13:23:44 +0200
Subject: x86/apic: Factor out default cpu_mask_to_apicid() operations

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/20120605112340.GA11454@dhcp-26-207.brq.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/apic.h          | 13 +++++++++---
 arch/x86/kernel/apic/apic.c          | 28 +++++++++++++++++++++++++
 arch/x86/kernel/apic/apic_flat_64.c  | 40 ++++--------------------------------
 arch/x86/kernel/apic/apic_noop.c     |  4 ++--
 arch/x86/kernel/apic/apic_numachip.c | 36 ++------------------------------
 arch/x86/kernel/apic/bigsmp_32.c     | 30 ++-------------------------
 arch/x86/kernel/apic/probe_32.c      |  4 ++--
 arch/x86/kernel/apic/x2apic_phys.c   | 36 ++------------------------------
 8 files changed, 52 insertions(+), 139 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index fc38195d6405..bef571769e68 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -592,14 +592,14 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
 #endif
 
 static inline unsigned int
-default_cpu_mask_to_apicid(const struct cpumask *cpumask)
+flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
 	return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
 }
 
 static inline unsigned int
-default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			       const struct cpumask *andmask)
+flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+			    const struct cpumask *andmask)
 {
 	unsigned long mask1 = cpumask_bits(cpumask)[0];
 	unsigned long mask2 = cpumask_bits(andmask)[0];
@@ -608,6 +608,13 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	return (unsigned int)(mask1 & mask2 & mask3);
 }
 
+extern unsigned int
+default_cpu_mask_to_apicid(const struct cpumask *cpumask);
+
+extern unsigned int
+default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+			       const struct cpumask *andmask);
+
 static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid)
 {
 	return physid_isset(apicid, *map);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 39a222e094af..96a2608252f1 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2123,6 +2123,34 @@ void default_init_apic_ldr(void)
 	apic_write(APIC_LDR, val);
 }
 
+unsigned int default_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	cpu = cpumask_first(cpumask);
+	if (likely((unsigned)cpu < nr_cpu_ids))
+		return per_cpu(x86_cpu_to_apicid, cpu);
+
+	return BAD_APICID;
+}
+
+unsigned int
+default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+			       const struct cpumask *andmask)
+{
+	int cpu;
+
+	for_each_cpu_and(cpu, cpumask, andmask) {
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
+	}
+	return per_cpu(x86_cpu_to_apicid, cpu);
+}
+
 /*
  * Power management
  */
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 61ac1afeff07..55b97ce4fa19 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -205,8 +205,8 @@ static struct apic apic_flat =  {
 	.set_apic_id			= set_apic_id,
 	.apic_id_mask			= 0xFFu << 24,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= flat_send_IPI_mask,
 	.send_IPI_mask_allbutself	= flat_send_IPI_mask_allbutself,
@@ -284,38 +284,6 @@ static void physflat_send_IPI_all(int vector)
 	physflat_send_IPI_mask(cpu_online_mask, vector);
 }
 
-static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	cpu = cpumask_first(cpumask);
-	if ((unsigned)cpu < nr_cpu_ids)
-		return per_cpu(x86_cpu_to_apicid, cpu);
-	else
-		return BAD_APICID;
-}
-
-static unsigned int
-physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-				const struct cpumask *andmask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	for_each_cpu_and(cpu, cpumask, andmask) {
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
-	}
-	return per_cpu(x86_cpu_to_apicid, cpu);
-}
-
 static int physflat_probe(void)
 {
 	if (apic == &apic_physflat || num_possible_cpus() > 8)
@@ -360,8 +328,8 @@ static struct apic apic_physflat =  {
 	.set_apic_id			= set_apic_id,
 	.apic_id_mask			= 0xFFu << 24,
 
-	.cpu_mask_to_apicid		= physflat_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and		= physflat_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= physflat_send_IPI_mask,
 	.send_IPI_mask_allbutself	= physflat_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index a6e4c6e06c08..7c3dd4fe0686 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -159,8 +159,8 @@ struct apic apic_noop = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0x0F << 24,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= noop_send_IPI_mask,
 	.send_IPI_mask_allbutself	= noop_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 3255a60fcc95..dba4bf2ed566 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -152,38 +152,6 @@ static void numachip_send_IPI_self(int vector)
 	__default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
 }
 
-static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	cpu = cpumask_first(cpumask);
-	if (likely((unsigned)cpu < nr_cpu_ids))
-		return per_cpu(x86_cpu_to_apicid, cpu);
-
-	return BAD_APICID;
-}
-
-static unsigned int
-numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-				const struct cpumask *andmask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	for_each_cpu_and(cpu, cpumask, andmask) {
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
-	}
-	return per_cpu(x86_cpu_to_apicid, cpu);
-}
-
 static int __init numachip_probe(void)
 {
 	return apic == &apic_numachip;
@@ -272,8 +240,8 @@ static struct apic apic_numachip __refconst = {
 	.set_apic_id			= set_apic_id,
 	.apic_id_mask			= 0xffU << 24,
 
-	.cpu_mask_to_apicid		= numachip_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and		= numachip_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= numachip_send_IPI_mask,
 	.send_IPI_mask_allbutself	= numachip_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index c288e81e00ff..907aa3d112a6 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -96,32 +96,6 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid)
 	return 1;
 }
 
-/* As we are using single CPU as destination, pick only one CPU here */
-static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
-	int cpu = cpumask_first(cpumask);
-
-	if (cpu < nr_cpu_ids)
-		return cpu_physical_id(cpu);
-	return BAD_APICID;
-}
-
-static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			      const struct cpumask *andmask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	for_each_cpu_and(cpu, cpumask, andmask) {
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			return cpu_physical_id(cpu);
-	}
-	return BAD_APICID;
-}
-
 static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
 {
 	return cpuid_apic >> index_msb;
@@ -220,8 +194,8 @@ static struct apic apic_bigsmp = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0xFF << 24,
 
-	.cpu_mask_to_apicid		= bigsmp_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and		= bigsmp_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= bigsmp_send_IPI_mask,
 	.send_IPI_mask_allbutself	= NULL,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 1b291da09e60..71b6ac48ab26 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -123,8 +123,8 @@ static struct apic apic_default = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0x0F << 24,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= default_send_IPI_mask_logical,
 	.send_IPI_mask_allbutself	= default_send_IPI_mask_allbutself_logical,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index b1a8b39e3c3f..f730269edef2 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -76,38 +76,6 @@ static void x2apic_send_IPI_all(int vector)
 	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	int cpu = cpumask_first(cpumask);
-
-	if ((unsigned)cpu < nr_cpu_ids)
-		return per_cpu(x86_cpu_to_apicid, cpu);
-	else
-		return BAD_APICID;
-}
-
-static unsigned int
-x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			      const struct cpumask *andmask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	for_each_cpu_and(cpu, cpumask, andmask) {
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
-	}
-
-	return per_cpu(x86_cpu_to_apicid, cpu);
-}
-
 static void init_x2apic_ldr(void)
 {
 }
@@ -164,8 +132,8 @@ static struct apic apic_x2apic_phys = {
 	.set_apic_id			= x2apic_set_apic_id,
 	.apic_id_mask			= 0xFFFFFFFFu,
 
-	.cpu_mask_to_apicid		= x2apic_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and		= x2apic_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= x2apic_send_IPI_mask,
 	.send_IPI_mask_allbutself	= x2apic_send_IPI_mask_allbutself,
-- 
cgit v1.2.3


From fbd24153c48b8425b09c161a020483cd77da870e Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkhan@gmail.com>
Date: Wed, 30 May 2012 18:40:03 -0600
Subject: x86/early_printk: Replace obsolete simple_strtoul() usage with
 kstrtoint()

Change early_serial_init() to call kstrtoul() instead of calling
obsoleted simple_strtoul().

Signed-off-by: Shuah Khan <shuahkhan@gmail.com>
Cc: Joe Perches <joe@perches.com>
Link: http://lkml.kernel.org/r/1338424803.3569.5.camel@lorien2
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/early_printk.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 9b9f18b49918..5e4771266f1a 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -119,7 +119,7 @@ static __init void early_serial_init(char *s)
 	unsigned char c;
 	unsigned divisor;
 	unsigned baud = DEFAULT_BAUD;
-	char *e;
+	ssize_t ret;
 
 	if (*s == ',')
 		++s;
@@ -127,14 +127,14 @@ static __init void early_serial_init(char *s)
 	if (*s) {
 		unsigned port;
 		if (!strncmp(s, "0x", 2)) {
-			early_serial_base = simple_strtoul(s, &e, 16);
+			ret = kstrtoint(s, 16, &early_serial_base);
 		} else {
 			static const int __initconst bases[] = { 0x3f8, 0x2f8 };
 
 			if (!strncmp(s, "ttyS", 4))
 				s += 4;
-			port = simple_strtoul(s, &e, 10);
-			if (port > 1 || s == e)
+			ret = kstrtouint(s, 10, &port);
+			if (ret || port > 1)
 				port = 0;
 			early_serial_base = bases[port];
 		}
@@ -149,8 +149,8 @@ static __init void early_serial_init(char *s)
 	outb(0x3, early_serial_base + MCR);	/* DTR + RTS */
 
 	if (*s) {
-		baud = simple_strtoul(s, &e, 0);
-		if (baud == 0 || s == e)
+		ret = kstrtouint(s, 0, &baud);
+		if (ret || baud == 0)
 			baud = DEFAULT_BAUD;
 	}
 
-- 
cgit v1.2.3


From c00b275043adc14d668f36266b890f0c53d46640 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 29 May 2012 21:27:44 +0200
Subject: uprobes: Optimize is_swbp_at_addr() for current->mm

Change is_swbp_at_addr() to try to avoid the costly
read_opcode() if mm == current->mm, __copy_from_user_inatomic()
should succeed in the likely case.

Currently this optimization is not important, but we are going
to add more is_swbp_at_addr(current->mm) callers.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120529192744.GA8057@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 985be4d80fe8..d0f5ec0dcdea 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -333,10 +333,20 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
 	uprobe_opcode_t opcode;
 	int result;
 
+	if (current->mm == mm) {
+		pagefault_disable();
+		result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr,
+								sizeof(opcode));
+		pagefault_enable();
+
+		if (likely(result == 0))
+			goto out;
+	}
+
 	result = read_opcode(mm, vaddr, &opcode);
 	if (result)
 		return result;
-
+out:
 	if (is_swbp_insn(&opcode))
 		return 1;
 
-- 
cgit v1.2.3


From a3d7bb47937b3a40b9f0c75655e97b3bb6407cbe Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 29 May 2012 21:27:59 +0200
Subject: uprobes: Change read_opcode() to use FOLL_FORCE

set_orig_insn()->read_opcode() should not fail if the probed
task did mprotect() after uprobe_register(), change it to use
FOLL_FORCE. Without FOLL_WRITE this doesn't have any "side"
effect but allows to read the !VM_READ memory.

There is another reason for this change, we are going to use
is_swbp_at_addr() from handle_swbp() which can race with another
thread doing mprotect().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120529192759.GB8057@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index d0f5ec0dcdea..a0dbc87a2ec6 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -312,7 +312,7 @@ static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_
 	void *vaddr_new;
 	int ret;
 
-	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &page, NULL);
+	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
 	if (ret <= 0)
 		return ret;
 
-- 
cgit v1.2.3


From 3a9ea0520f38def4a3915b91f82455b749f07d88 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 29 May 2012 21:28:57 +0200
Subject: uprobes: Introduce find_active_uprobe() helper

No functional changes. Move the "find uprobe" code from
handle_swbp() to the new helper, find_active_uprobe().

Note: with or without this change, the find-active-uprobe logic
is not exactly right. We can race with another thread which
unmaps the memory with the valid uprobe before we take
mm->mmap_sem. We can't find this uprobe simply because
find_vma() fails. In this case we wrongly assume that this trap
was not caused by uprobe and send the erroneous SIGTRAP. See the
next changes.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120529192857.GC8057@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 47 ++++++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index a0dbc87a2ec6..eaf4d55fd424 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1489,38 +1489,47 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
 	return false;
 }
 
-/*
- * Run handler and ask thread to singlestep.
- * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
- */
-static void handle_swbp(struct pt_regs *regs)
+static struct uprobe *find_active_uprobe(unsigned long bp_vaddr)
 {
+	struct mm_struct *mm = current->mm;
+	struct uprobe *uprobe = NULL;
 	struct vm_area_struct *vma;
-	struct uprobe_task *utask;
-	struct uprobe *uprobe;
-	struct mm_struct *mm;
-	unsigned long bp_vaddr;
 
-	uprobe = NULL;
-	bp_vaddr = uprobe_get_swbp_addr(regs);
-	mm = current->mm;
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, bp_vaddr);
 
-	if (vma && vma->vm_start <= bp_vaddr && valid_vma(vma, false)) {
-		struct inode *inode;
-		loff_t offset;
+	if (vma && vma->vm_start <= bp_vaddr) {
+		if (valid_vma(vma, false)) {
+			struct inode *inode;
+			loff_t offset;
 
-		inode = vma->vm_file->f_mapping->host;
-		offset = bp_vaddr - vma->vm_start;
-		offset += (vma->vm_pgoff << PAGE_SHIFT);
-		uprobe = find_uprobe(inode, offset);
+			inode = vma->vm_file->f_mapping->host;
+			offset = bp_vaddr - vma->vm_start;
+			offset += (vma->vm_pgoff << PAGE_SHIFT);
+			uprobe = find_uprobe(inode, offset);
+		}
 	}
 
 	srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id);
 	current->uprobe_srcu_id = -1;
 	up_read(&mm->mmap_sem);
 
+	return uprobe;
+}
+
+/*
+ * Run handler and ask thread to singlestep.
+ * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
+ */
+static void handle_swbp(struct pt_regs *regs)
+{
+	struct uprobe_task *utask;
+	struct uprobe *uprobe;
+	unsigned long bp_vaddr;
+
+	bp_vaddr = uprobe_get_swbp_addr(regs);
+	uprobe = find_active_uprobe(bp_vaddr);
+
 	if (!uprobe) {
 		/* No matching uprobe; signal SIGTRAP. */
 		send_sig(SIGTRAP, current, 0);
-- 
cgit v1.2.3


From d790d34653ab20c74034902f5f0889bba807949a Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 29 May 2012 21:29:14 +0200
Subject: uprobes: Teach find_active_uprobe() to provide the "is_swbp" info

A separate patch to simplify the review, and for the
documentation.

The patch adds another "int *is_swbp" argument to
find_active_uprobe(), so far its only caller doesn't use this
info.

With this patch find_active_uprobe() additionally does:

	- if find_vma() + ->vm_start check fails, *is_swbp = -EFAULT

	- otherwise, if valid_vma() + find_uprobe() fails, it holds
	  the result of is_swbp_at_addr(), can be negative too. The
	  latter is only possible if we raced with another thread
	  which did munmap/etc after we hit this bp.

IOW. If find_active_uprobe(&is_swbp) returns NULL, the caller
can look at is_swbp to figure out whether the current insn is bp
or not, or detect the race with another thread if it is
negative.

Note: I think that performance-wise this change is fine. This
adds is_swbp_at_addr(), but only if we raced with
uprobe_unregister() or if we hit the "normal" int3 but this mm
has uprobes as well. And even in this case the slow
read_opcode() path is very unlikely, this insn recently
triggered do_int3(), __copy_from_user_inatomic() shouldn't fail
in the likely case.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120529192914.GD8057@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index eaf4d55fd424..ee3df704e78a 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1489,7 +1489,7 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
 	return false;
 }
 
-static struct uprobe *find_active_uprobe(unsigned long bp_vaddr)
+static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
 {
 	struct mm_struct *mm = current->mm;
 	struct uprobe *uprobe = NULL;
@@ -1497,7 +1497,6 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr)
 
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, bp_vaddr);
-
 	if (vma && vma->vm_start <= bp_vaddr) {
 		if (valid_vma(vma, false)) {
 			struct inode *inode;
@@ -1508,6 +1507,11 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr)
 			offset += (vma->vm_pgoff << PAGE_SHIFT);
 			uprobe = find_uprobe(inode, offset);
 		}
+
+		if (!uprobe)
+			*is_swbp = is_swbp_at_addr(mm, bp_vaddr);
+	} else {
+		*is_swbp = -EFAULT;
 	}
 
 	srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id);
@@ -1526,9 +1530,10 @@ static void handle_swbp(struct pt_regs *regs)
 	struct uprobe_task *utask;
 	struct uprobe *uprobe;
 	unsigned long bp_vaddr;
+	int is_swbp;
 
 	bp_vaddr = uprobe_get_swbp_addr(regs);
-	uprobe = find_active_uprobe(bp_vaddr);
+	uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
 
 	if (!uprobe) {
 		/* No matching uprobe; signal SIGTRAP. */
-- 
cgit v1.2.3


From 77fc4af1b59d12ab3b1467adf0a5204806853123 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 29 May 2012 21:29:28 +0200
Subject: uprobes: Change register_for_each_vma() to take mm->mmap_sem for
 writing

Change register_for_each_vma() to take mm->mmap_sem for writing.
This is a bit unfortunate but hopefully not too bad, this is the
slow path anyway.

This is needed to ensure that find_active_uprobe() can not race
with uprobe_register() which adds the new bp at the same
bp_vaddr, after find_uprobe() fails and before
is_swbp_at_addr_fast() checks the memory.

IOW, this is needed to ensure that if find_active_uprobe()
returns NULL but is_swbp == true, we can safely assume that it
was the "normal" int3 and we should send SIGTRAP.

There is another reason for this change. We are going to replace
uprobes_state->count with MMF_ flags set by register/unregister
and cleared by find_active_uprobe(), and set/clear shouldn't
race with each other.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120529192928.GE8057@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index ee3df704e78a..a2ed82b4808c 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -853,12 +853,12 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 		}
 
 		mm = vi->mm;
-		down_read(&mm->mmap_sem);
+		down_write(&mm->mmap_sem);
 		vma = find_vma(mm, (unsigned long)vi->vaddr);
 		if (!vma || !valid_vma(vma, is_register)) {
 			list_del(&vi->probe_list);
 			kfree(vi);
-			up_read(&mm->mmap_sem);
+			up_write(&mm->mmap_sem);
 			mmput(mm);
 			continue;
 		}
@@ -867,7 +867,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 						vaddr != vi->vaddr) {
 			list_del(&vi->probe_list);
 			kfree(vi);
-			up_read(&mm->mmap_sem);
+			up_write(&mm->mmap_sem);
 			mmput(mm);
 			continue;
 		}
@@ -877,7 +877,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 		else
 			remove_breakpoint(uprobe, mm, vi->vaddr);
 
-		up_read(&mm->mmap_sem);
+		up_write(&mm->mmap_sem);
 		mmput(mm);
 		if (is_register) {
 			if (ret && ret == -EEXIST)
-- 
cgit v1.2.3


From 56bb4cf6475d702d2fb00fc641aa6441097c0330 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 29 May 2012 21:29:47 +0200
Subject: uprobes: Teach handle_swbp() to rely on "is_swbp" rather than
 uprobes_srcu

Currently handle_swbp() assumes that it can't race with
unregister, so it roughly does:

	if (find_uprobe(vaddr))
		process_uprobe();
	else
		send_sig(SIGTRAP);

This relies on the not-really-working uprobes_srcu code we are
going to remove, see the next patch.

With this patch we rely on the result of
is_swbp_at_addr(bp_vaddr) if find_uprobe() fails.

If is_swbp == 1, then we hit the normal int3, we should send
SIGTRAP.

If is_swbp == 0, we raced with uprobe_unregister(), we simply
restart this insn again.

The "difficult" case is is_swbp == -EFAULT, when we can't read
this memory. In this case I think we should restart too, and
this is more correct compared to the current code which sends
SIGTRAP.

Ignoring ENOMEM/etc from get_user_pages(), this can only happen
if another thread unmaps this memory before find_active_uprobe()
takes mmap_sem. It would be better to pretend it was unmapped
before this insn was executed, restart, and get SIGSEGV.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120529192947.GF8057@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index a2ed82b4808c..1f02e3bbfc1d 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1530,14 +1530,26 @@ static void handle_swbp(struct pt_regs *regs)
 	struct uprobe_task *utask;
 	struct uprobe *uprobe;
 	unsigned long bp_vaddr;
-	int is_swbp;
+	int uninitialized_var(is_swbp);
 
 	bp_vaddr = uprobe_get_swbp_addr(regs);
 	uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
 
 	if (!uprobe) {
-		/* No matching uprobe; signal SIGTRAP. */
-		send_sig(SIGTRAP, current, 0);
+		if (is_swbp > 0) {
+			/* No matching uprobe; signal SIGTRAP. */
+			send_sig(SIGTRAP, current, 0);
+		} else {
+			/*
+			 * Either we raced with uprobe_unregister() or we can't
+			 * access this memory. The latter is only possible if
+			 * another thread plays with our ->mm. In both cases
+			 * we can simply restart. If this vma was unmapped we
+			 * can pretend this insn was not executed yet and get
+			 * the (correct) SIGSEGV after restart.
+			 */
+			instruction_pointer_set(regs, bp_vaddr);
+		}
 		return;
 	}
 
-- 
cgit v1.2.3


From 778b032d96909690c19d84f8d17c13be65ed6f8e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 29 May 2012 21:30:08 +0200
Subject: uprobes: Kill uprobes_srcu/uprobe_srcu_id

Kill the no longer needed uprobes_srcu/uprobe_srcu_id code.

It doesn't really work anyway. synchronize_srcu() can only
synchronize with the code "inside" the
srcu_read_lock/srcu_read_unlock section, while
uprobe_pre_sstep_notifier() does srcu_read_lock() _after_ we
already hit the breakpoint.

I guess this probably works "in practice". synchronize_srcu() is
slow and it implies synchronize_sched(), and the probed task
enters the non- preemptible section at the start of exception
handler. Still this is not right at least in theory, and
task->uprobe_srcu_id blows task_struct.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120529193008.GG8057@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h   |  1 -
 kernel/events/uprobes.c | 22 +++-------------------
 2 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6029d8c54476..6bd19655c1a7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1569,7 +1569,6 @@ struct task_struct {
 #endif
 #ifdef CONFIG_UPROBES
 	struct uprobe_task *utask;
-	int uprobe_srcu_id;
 #endif
 };
 
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 1f02e3bbfc1d..8c5e043cd309 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -38,7 +38,6 @@
 #define UINSNS_PER_PAGE			(PAGE_SIZE/UPROBE_XOL_SLOT_BYTES)
 #define MAX_UPROBE_XOL_SLOTS		UINSNS_PER_PAGE
 
-static struct srcu_struct uprobes_srcu;
 static struct rb_root uprobes_tree = RB_ROOT;
 
 static DEFINE_SPINLOCK(uprobes_treelock);	/* serialize rbtree access */
@@ -738,20 +737,14 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr)
 }
 
 /*
- * There could be threads that have hit the breakpoint and are entering the
- * notifier code and trying to acquire the uprobes_treelock. The thread
- * calling delete_uprobe() that is removing the uprobe from the rb_tree can
- * race with these threads and might acquire the uprobes_treelock compared
- * to some of the breakpoint hit threads. In such a case, the breakpoint
- * hit threads will not find the uprobe. The current unregistering thread
- * waits till all other threads have hit a breakpoint, to acquire the
- * uprobes_treelock before the uprobe is removed from the rbtree.
+ * There could be threads that have already hit the breakpoint. They
+ * will recheck the current insn and restart if find_uprobe() fails.
+ * See find_active_uprobe().
  */
 static void delete_uprobe(struct uprobe *uprobe)
 {
 	unsigned long flags;
 
-	synchronize_srcu(&uprobes_srcu);
 	spin_lock_irqsave(&uprobes_treelock, flags);
 	rb_erase(&uprobe->rb_node, &uprobes_tree);
 	spin_unlock_irqrestore(&uprobes_treelock, flags);
@@ -1388,9 +1381,6 @@ void uprobe_free_utask(struct task_struct *t)
 {
 	struct uprobe_task *utask = t->utask;
 
-	if (t->uprobe_srcu_id != -1)
-		srcu_read_unlock_raw(&uprobes_srcu, t->uprobe_srcu_id);
-
 	if (!utask)
 		return;
 
@@ -1408,7 +1398,6 @@ void uprobe_free_utask(struct task_struct *t)
 void uprobe_copy_process(struct task_struct *t)
 {
 	t->utask = NULL;
-	t->uprobe_srcu_id = -1;
 }
 
 /*
@@ -1513,9 +1502,6 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
 	} else {
 		*is_swbp = -EFAULT;
 	}
-
-	srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id);
-	current->uprobe_srcu_id = -1;
 	up_read(&mm->mmap_sem);
 
 	return uprobe;
@@ -1656,7 +1642,6 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs)
 		utask->state = UTASK_BP_HIT;
 
 	set_thread_flag(TIF_UPROBE);
-	current->uprobe_srcu_id = srcu_read_lock_raw(&uprobes_srcu);
 
 	return 1;
 }
@@ -1691,7 +1676,6 @@ static int __init init_uprobes(void)
 		mutex_init(&uprobes_mutex[i]);
 		mutex_init(&uprobes_mmap_mutex[i]);
 	}
-	init_srcu_struct(&uprobes_srcu);
 
 	return register_die_notifier(&uprobe_exception_nb);
 }
-- 
cgit v1.2.3


From 5a425294ee7d4ab5a374248e85838dfd450caf75 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 5 Jun 2012 15:30:31 +0200
Subject: perf/x86: Fix Intel shared extra MSR allocation

Zheng Yan reported that event group validation can wreck event state
when Intel extra_reg allocation changes event state.

Validation shouldn't change any persistent state. Cloning events in
validate_{event,group}() isn't really pretty either, so add a few
special cases to avoid modifying the event state.

The code is restructured to minimize the special case impact.

Reported-by: Zheng Yan <zheng.z.yan@linux.intel.com>
Acked-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1338903031.28282.175.camel@twins
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c       |  1 +
 arch/x86/kernel/cpu/perf_event.h       |  1 +
 arch/x86/kernel/cpu/perf_event_intel.c | 92 +++++++++++++++++++++++-----------
 3 files changed, 66 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6da0183..cb608383e4f6 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
 		if (!cpuc->shared_regs)
 			goto error;
 	}
+	cpuc->is_fake = 1;
 	return cpuc;
 error:
 	free_fake_cpuc(cpuc);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf54493..83794d8e6af0 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
 
 	unsigned int		group_flag;
+	int			is_fake;
 
 	/*
 	 * Intel DebugStore bits
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546ec6aef..965baa2fa790 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
 	return NULL;
 }
 
-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static int intel_alt_er(int idx)
 {
 	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
-		return false;
+		return idx;
 
-	if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
-		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
-		event->hw.config |= 0x01bb;
-		event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
-		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
-	} else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+	if (idx == EXTRA_REG_RSP_0)
+		return EXTRA_REG_RSP_1;
+
+	if (idx == EXTRA_REG_RSP_1)
+		return EXTRA_REG_RSP_0;
+
+	return idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+	event->hw.extra_reg.idx = idx;
+
+	if (idx == EXTRA_REG_RSP_0) {
 		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
 		event->hw.config |= 0x01b7;
-		event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
 		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
+	} else if (idx == EXTRA_REG_RSP_1) {
+		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+		event->hw.config |= 0x01bb;
+		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
 	}
-
-	if (event->hw.extra_reg.idx == orig_idx)
-		return false;
-
-	return true;
 }
 
 /*
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
 	struct event_constraint *c = &emptyconstraint;
 	struct er_account *era;
 	unsigned long flags;
-	int orig_idx = reg->idx;
+	int idx = reg->idx;
 
-	/* already allocated shared msr */
-	if (reg->alloc)
+	/*
+	 * reg->alloc can be set due to existing state, so for fake cpuc we
+	 * need to ignore this, otherwise we might fail to allocate proper fake
+	 * state for this extra reg constraint. Also see the comment below.
+	 */
+	if (reg->alloc && !cpuc->is_fake)
 		return NULL; /* call x86_get_event_constraint() */
 
 again:
-	era = &cpuc->shared_regs->regs[reg->idx];
+	era = &cpuc->shared_regs->regs[idx];
 	/*
 	 * we use spin_lock_irqsave() to avoid lockdep issues when
 	 * passing a fake cpuc
@@ -1173,6 +1183,29 @@ again:
 
 	if (!atomic_read(&era->ref) || era->config == reg->config) {
 
+		/*
+		 * If its a fake cpuc -- as per validate_{group,event}() we
+		 * shouldn't touch event state and we can avoid doing so
+		 * since both will only call get_event_constraints() once
+		 * on each event, this avoids the need for reg->alloc.
+		 *
+		 * Not doing the ER fixup will only result in era->reg being
+		 * wrong, but since we won't actually try and program hardware
+		 * this isn't a problem either.
+		 */
+		if (!cpuc->is_fake) {
+			if (idx != reg->idx)
+				intel_fixup_er(event, idx);
+
+			/*
+			 * x86_schedule_events() can call get_event_constraints()
+			 * multiple times on events in the case of incremental
+			 * scheduling(). reg->alloc ensures we only do the ER
+			 * allocation once.
+			 */
+			reg->alloc = 1;
+		}
+
 		/* lock in msr value */
 		era->config = reg->config;
 		era->reg = reg->reg;
@@ -1180,17 +1213,17 @@ again:
 		/* one more user */
 		atomic_inc(&era->ref);
 
-		/* no need to reallocate during incremental event scheduling */
-		reg->alloc = 1;
-
 		/*
 		 * need to call x86_get_event_constraint()
 		 * to check if associated event has constraints
 		 */
 		c = NULL;
-	} else if (intel_try_alt_er(event, orig_idx)) {
-		raw_spin_unlock_irqrestore(&era->lock, flags);
-		goto again;
+	} else {
+		idx = intel_alt_er(idx);
+		if (idx != reg->idx) {
+			raw_spin_unlock_irqrestore(&era->lock, flags);
+			goto again;
+		}
 	}
 	raw_spin_unlock_irqrestore(&era->lock, flags);
 
@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
 	struct er_account *era;
 
 	/*
-	 * only put constraint if extra reg was actually
-	 * allocated. Also takes care of event which do
-	 * not use an extra shared reg
+	 * Only put constraint if extra reg was actually allocated. Also takes
+	 * care of event which do not use an extra shared reg.
+	 *
+	 * Also, if this is a fake cpuc we shouldn't touch any event state
+	 * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+	 * either since it'll be thrown out.
 	 */
-	if (!reg->alloc)
+	if (!reg->alloc || cpuc->is_fake)
 		return;
 
 	era = &cpuc->shared_regs->regs[reg->idx];
-- 
cgit v1.2.3


From 0780c927a02492f917a74f51f3c801c76a637c57 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 5 Jun 2012 10:26:43 +0200
Subject: perf/x86: Implement cycles:p for SNB/IVB

Now that there's finally a chip with working PEBS (IvyBridge), we can
enable the hardware and implement cycles:p for SNB/IVB.

Cc: Stephane Eranian <eranian@google.com>
Requested-and-tested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1338884803.28282.153.camel@twins
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.h       |  1 +
 arch/x86/kernel/cpu/perf_event_intel.c | 50 ++++++++++++++++++++++++++++------
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 83794d8e6af0..7241e2fc3c17 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -365,6 +365,7 @@ struct x86_pmu {
 	int		pebs_record_size;
 	void		(*drain_pebs)(struct pt_regs *regs);
 	struct event_constraint *pebs_constraints;
+	void		(*pebs_aliases)(struct perf_event *event);
 
 	/*
 	 * Intel LBR
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 965baa2fa790..2312c1ff1b19 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1336,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
 	intel_put_shared_regs_event_constraints(cpuc, event);
 }
 
-static int intel_pmu_hw_config(struct perf_event *event)
+static void intel_pebs_aliases_core2(struct perf_event *event)
 {
-	int ret = x86_pmu_hw_config(event);
-
-	if (ret)
-		return ret;
-
-	if (event->attr.precise_ip &&
-	    (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+	if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
 		/*
 		 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
 		 * (0x003c) so that we can use it with PEBS.
@@ -1365,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event)
 		 */
 		u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
 
+		alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+		event->hw.config = alt_config;
+	}
+}
+
+static void intel_pebs_aliases_snb(struct perf_event *event)
+{
+	if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+		/*
+		 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+		 * (0x003c) so that we can use it with PEBS.
+		 *
+		 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+		 * PEBS capable. However we can use UOPS_RETIRED.ALL
+		 * (0x01c2), which is a PEBS capable event, to get the same
+		 * count.
+		 *
+		 * UOPS_RETIRED.ALL counts the number of cycles that retires
+		 * CNTMASK micro-ops. By setting CNTMASK to a value (16)
+		 * larger than the maximum number of micro-ops that can be
+		 * retired per cycle (4) and then inverting the condition, we
+		 * count all cycles that retire 16 or less micro-ops, which
+		 * is every cycle.
+		 *
+		 * Thereby we gain a PEBS capable cycle counter.
+		 */
+		u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
 
 		alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
 		event->hw.config = alt_config;
 	}
+}
+
+static int intel_pmu_hw_config(struct perf_event *event)
+{
+	int ret = x86_pmu_hw_config(event);
+
+	if (ret)
+		return ret;
+
+	if (event->attr.precise_ip && x86_pmu.pebs_aliases)
+		x86_pmu.pebs_aliases(event);
 
 	if (intel_pmu_needs_lbr_smpl(event)) {
 		ret = intel_pmu_setup_lbr_filter(event);
@@ -1643,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = {
 	.max_period		= (1ULL << 31) - 1,
 	.get_event_constraints	= intel_get_event_constraints,
 	.put_event_constraints	= intel_put_event_constraints,
+	.pebs_aliases		= intel_pebs_aliases_core2,
 
 	.format_attrs		= intel_arch3_formats_attr,
 
@@ -1885,6 +1918,7 @@ __init int intel_pmu_init(void)
 
 		x86_pmu.event_constraints = intel_snb_event_constraints;
 		x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
+		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
 		x86_pmu.extra_regs = intel_snb_extra_regs;
 		/* all extra regs are per-cpu when HT is on */
 		x86_pmu.er_flags |= ERF_HAS_RSP_1;
-- 
cgit v1.2.3


From 47a8863dbb11745446314ca126593789ab74d93a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 5 Jun 2012 10:26:43 +0200
Subject: perf/x86: Enable/Add IvyBridge hardware support

Implement rudimentary IVB perf support. The SDM states its identical
to SNB with exception of the exact event tables, but a quick look
suggests they're similar enough.

Also mark SNB-EP as broken for now.

Requested-and-tested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1338884803.28282.153.camel@twins
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 2312c1ff1b19..187c294bc658 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1909,8 +1909,9 @@ __init int intel_pmu_init(void)
 		break;
 
 	case 42: /* SandyBridge */
-		x86_add_quirk(intel_sandybridge_quirk);
 	case 45: /* SandyBridge, "Romely-EP" */
+		x86_add_quirk(intel_sandybridge_quirk);
+	case 58: /* IvyBridge */
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
-- 
cgit v1.2.3


From 212d95dfdb66e5c81879b08e4f7fbfc8498b1ab5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 5 Jun 2012 10:26:43 +0200
Subject: perf/x86: Update SNB PEBS constraints

Afaict there's no need to (incompletely) iterate the
MEM_UOPS_RETIRED.* umask state.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1338884803.28282.153.camel@twins
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 5a3edc27f6e5..35e2192df9f4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
 	INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
 	INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
 	INTEL_EVENT_CONSTRAINT(0xcd, 0x8),    /* MEM_TRANS_RETIRED.* */
-	INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */
-	INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
-	INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
-	INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
-	INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
-	INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
-	INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
-	INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
+	INTEL_EVENT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
 	INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 	INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
 	INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
-- 
cgit v1.2.3


From 1c2ac3fde3e35279958e7b0408e2dcf866465301 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 14 May 2012 15:25:34 +0200
Subject: perf/x86: Fix wrmsrl() debug wrapper

Move the wrmslr() debug wrapper to the common header now that all the
include games are gone. Also clean it up a bit to avoid multiple
evaluation of the argument.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-l4gkfnivwv4yi5mqxjlovymx@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c | 11 -----------
 arch/x86/kernel/cpu/perf_event.h | 12 ++++++++++++
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c4706cf9c011..43c2017347e7 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -35,17 +35,6 @@
 
 #include "perf_event.h"
 
-#if 0
-#undef wrmsrl
-#define wrmsrl(msr, val) 					\
-do {								\
-	trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
-			(unsigned long)(val));			\
-	native_write_msr((msr), (u32)((u64)(val)), 		\
-			(u32)((u64)(val) >> 32));		\
-} while (0)
-#endif
-
 struct x86_pmu x86_pmu __read_mostly;
 
 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 7241e2fc3c17..23b5710b1747 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -14,6 +14,18 @@
 
 #include <linux/perf_event.h>
 
+#if 0
+#undef wrmsrl
+#define wrmsrl(msr, val) 						\
+do {									\
+	unsigned int _msr = (msr);					\
+	u64 _val = (val);						\
+	trace_printk("wrmsrl(%x, %Lx)\n", (unsigned int)(_msr),		\
+			(unsigned long long)(_val));			\
+	native_write_msr((_msr), (u32)(_val), (u32)(_val >> 32));	\
+} while (0)
+#endif
+
 /*
  *          |   NHM/WSM    |      SNB     |
  * register -------------------------------
-- 
cgit v1.2.3


From 1ff4d58a192aea7f245981e2579765f961f6eb9c Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 5 Jun 2012 17:56:50 -0700
Subject: x86: Add rdpmcl()

Add a version of rdpmc() that directly reads into a u64

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1338944211-28275-4-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/msr.h      | 2 ++
 arch/x86/include/asm/paravirt.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 084ef95274cd..e489c1475be9 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -237,6 +237,8 @@ do {							\
 	(high) = (u32)(_l >> 32);			\
 } while (0)
 
+#define rdpmcl(counter, val) ((val) = native_read_pmc(counter))
+
 #define rdtscp(low, high, aux)					\
 do {                                                            \
 	unsigned long long _val = native_read_tscp(&(aux));     \
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 6cbbabf52707..14ce05dfe04e 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -252,6 +252,8 @@ do {						\
 	high = _l >> 32;			\
 } while (0)
 
+#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter))
+
 static inline unsigned long long paravirt_rdtscp(unsigned int *aux)
 {
 	return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux);
-- 
cgit v1.2.3


From c48b60538c3ba05a7a2713c4791b25405525431b Mon Sep 17 00:00:00 2001
From: Vince Weaver <vweaver1@eecs.utk.edu>
Date: Thu, 1 Mar 2012 17:28:14 -0500
Subject: perf/x86: Use rdpmc() rather than rdmsr() when possible in the kernel

The rdpmc instruction is faster than the equivelant rdmsr call,
so use it when possible in the kernel.

The perfctr kernel patches did this, after extensive testing showed
rdpmc to always be faster (One can look in etc/costs in the perfctr-2.6
package to see a historical list of the overhead).

I have done some tests on a 3.2 kernel, the kernel module I used
was included in the first posting of this patch:

                   rdmsr           rdpmc
 Core2 T9900:      203.9 cycles     30.9 cycles
 AMD fam0fh:        56.2 cycles      9.8 cycles
 Atom 6/28/2:      129.7 cycles     50.6 cycles

The speedup of using rdpmc is large.

[ It's probably possible (and desirable) to do this without
  requiring a new field in the hw_perf_event structure, but
  the fixed events make this tricky. ]

Signed-off-by: Vince Weaver <vweaver1@eecs.utk.edu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/alpine.DEB.2.00.1203011724030.26934@cl320.eecs.utk.edu
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c | 4 +++-
 include/linux/perf_event.h       | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 43c2017347e7..000a4746c7ce 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -75,7 +75,7 @@ u64 x86_perf_event_update(struct perf_event *event)
 	 */
 again:
 	prev_raw_count = local64_read(&hwc->prev_count);
-	rdmsrl(hwc->event_base, new_raw_count);
+	rdpmcl(hwc->event_base_rdpmc, new_raw_count);
 
 	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 					new_raw_count) != prev_raw_count)
@@ -819,9 +819,11 @@ static inline void x86_assign_hw_event(struct perf_event *event,
 	} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
 		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
 		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
+		hwc->event_base_rdpmc = (hwc->idx - X86_PMC_IDX_FIXED) | 1<<30;
 	} else {
 		hwc->config_base = x86_pmu_config_addr(hwc->idx);
 		hwc->event_base  = x86_pmu_event_addr(hwc->idx);
+		hwc->event_base_rdpmc = x86_pmu_addr_offset(hwc->idx);
 	}
 }
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 45db49f64bb4..1ce887abcc5c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -677,6 +677,7 @@ struct hw_perf_event {
 			u64		last_tag;
 			unsigned long	config_base;
 			unsigned long	event_base;
+			int		event_base_rdpmc;
 			int		idx;
 			int		last_cpu;
 
-- 
cgit v1.2.3


From 70ab7003dec58afeae7f5d681dfa309b3a259f03 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 5 Jun 2012 17:56:48 -0700
Subject: perf/x86: Don't assume there can be only 4 PEBS events

On Sandy Bridge in non HT mode there are 8 counters available.
Since every counter can write a PEBS record assuming there are
4 max is incorrect. Use the reported counter number -- with an
upper limit for a static array -- instead.

Also I made the warning messages a bit more informational.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1338944211-28275-2-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.h          | 3 ++-
 arch/x86/kernel/cpu/perf_event_intel.c    | 2 ++
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 8 ++++----
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 23b5710b1747..3df3de9452a9 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -69,7 +69,7 @@ struct amd_nb {
 };
 
 /* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS		4
+#define MAX_PEBS_EVENTS		8
 
 /*
  * A debug store configuration.
@@ -378,6 +378,7 @@ struct x86_pmu {
 	void		(*drain_pebs)(struct pt_regs *regs);
 	struct event_constraint *pebs_constraints;
 	void		(*pebs_aliases)(struct perf_event *event);
+	int 		max_pebs_events;
 
 	/*
 	 * Intel LBR
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 187c294bc658..e23e71f25264 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1800,6 +1800,8 @@ __init int intel_pmu_init(void)
 	x86_pmu.events_maskl		= ebx.full;
 	x86_pmu.events_mask_len		= eax.split.mask_length;
 
+	x86_pmu.max_pebs_events		= min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
+
 	/*
 	 * Quirk: v2 perfmon does not report fixed-purpose events, so
 	 * assume at least 3 events:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 35e2192df9f4..026373edef7f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -620,7 +620,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 	 * Should not happen, we program the threshold at 1 and do not
 	 * set a reset value.
 	 */
-	WARN_ON_ONCE(n > 1);
+	WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
 	at += n - 1;
 
 	__intel_pmu_pebs_event(event, iregs, at);
@@ -651,10 +651,10 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 	 * Should not happen, we program the threshold at 1 and do not
 	 * set a reset value.
 	 */
-	WARN_ON_ONCE(n > MAX_PEBS_EVENTS);
+	WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
 
 	for ( ; at < top; at++) {
-		for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
+		for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {
 			event = cpuc->events[bit];
 			if (!test_bit(bit, cpuc->active_mask))
 				continue;
@@ -670,7 +670,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 			break;
 		}
 
-		if (!event || bit >= MAX_PEBS_EVENTS)
+		if (!event || bit >= x86_pmu.max_pebs_events)
 			continue;
 
 		__intel_pmu_pebs_event(event, iregs, at);
-- 
cgit v1.2.3


From 7fbb98c5cb07563d3ee08714073a8e5452a96be2 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 7 Jun 2012 10:21:21 -0400
Subject: x86: Save cr2 in NMI in case NMIs take a page fault

Avi Kivity reported that page faults in NMIs could cause havic if
the NMI preempted another page fault handler:

   The recent changes to NMI allow exceptions to take place in NMI
   handlers, but I think that a #PF (say, due to access to vmalloc space)
   is still problematic.  Consider the sequence

    #PF  (cr2 set by processor)
      NMI
        ...
        #PF (cr2 clobbered)
          do_page_fault()
          IRET
        ...
        IRET
      do_page_fault()
        address = read_cr2()

   The last line reads the overwritten cr2 value.

Originally I wrote a patch to solve this by saving the cr2 on the stack.
Brian Gerst suggested to save it in the r12 register as both r12 and rbx
are saved by the do_nmi handler as required by the C standard. But rbx
is already used for saving if swapgs needs to be run on exit of the NMI
handler.

Link: http://lkml.kernel.org/r/4FBB8C40.6080304@redhat.com
Link: http://lkml.kernel.org/r/1337763411.13348.140.camel@gandalf.stny.rr.com

Reported-by: Avi Kivity <avi@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Suggested-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/entry_64.S | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7d65133b51be..111f6bbd8b38 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1758,10 +1758,30 @@ end_repeat_nmi:
 	 */
 	call save_paranoid
 	DEFAULT_FRAME 0
+
+	/*
+	 * Save off the CR2 register. If we take a page fault in the NMI then
+	 * it could corrupt the CR2 value. If the NMI preempts a page fault
+	 * handler before it was able to read the CR2 register, and then the
+	 * NMI itself takes a page fault, the page fault that was preempted
+	 * will read the information from the NMI page fault and not the
+	 * origin fault. Save it off and restore it if it changes.
+	 * Use the r12 callee-saved register.
+	 */
+	movq %cr2, %r12
+
 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
 	movq %rsp,%rdi
 	movq $-1,%rsi
 	call do_nmi
+
+	/* Did the NMI take a page fault? Restore cr2 if it did */
+	movq %cr2, %rcx
+	cmpq %rcx, %r12
+	je 1f
+	movq %r12, %cr2
+1:
+	
 	testl %ebx,%ebx				/* swapgs needed? */
 	jnz nmi_restore
 nmi_swapgs:
-- 
cgit v1.2.3


From 1f975f78c84c852e09463a2dfa57e3174e5c719e Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@amd.com>
Date: Fri, 1 Jun 2012 16:52:35 +0200
Subject: x86, pvops: Remove hooks for {rd,wr}msr_safe_regs

There were paravirt_ops hooks for the full register set variant of
{rd,wr}msr_safe which are actually not used by anyone anymore. Remove
them to make the code cleaner and avoid silent breakages when the pvops
members were uninitialized. This has been boot-tested natively and under
Xen with PVOPS enabled and disabled on one machine.

Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Link: http://lkml.kernel.org/r/1338562358-28182-2-git-send-email-bp@amd64.org
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/msr.h            | 67 +++++++++++++++--------------------
 arch/x86/include/asm/paravirt.h       | 39 --------------------
 arch/x86/include/asm/paravirt_types.h |  2 --
 arch/x86/kernel/paravirt.c            |  2 --
 arch/x86/lib/msr-reg-export.c         |  4 +--
 arch/x86/lib/msr-reg.S                | 10 +++---
 arch/x86/xen/enlighten.c              |  2 --
 7 files changed, 35 insertions(+), 91 deletions(-)

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 084ef95274cd..81860cc012d1 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -115,8 +115,8 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
 
 extern unsigned long long native_read_tsc(void);
 
-extern int native_rdmsr_safe_regs(u32 regs[8]);
-extern int native_wrmsr_safe_regs(u32 regs[8]);
+extern int rdmsr_safe_regs(u32 regs[8]);
+extern int wrmsr_safe_regs(u32 regs[8]);
 
 static __always_inline unsigned long long __native_read_tsc(void)
 {
@@ -187,43 +187,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
 	return err;
 }
 
-static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
-{
-	u32 gprs[8] = { 0 };
-	int err;
-
-	gprs[1] = msr;
-	gprs[7] = 0x9c5a203a;
-
-	err = native_rdmsr_safe_regs(gprs);
-
-	*p = gprs[0] | ((u64)gprs[2] << 32);
-
-	return err;
-}
-
-static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
-{
-	u32 gprs[8] = { 0 };
-
-	gprs[0] = (u32)val;
-	gprs[1] = msr;
-	gprs[2] = val >> 32;
-	gprs[7] = 0x9c5a203a;
-
-	return native_wrmsr_safe_regs(gprs);
-}
-
-static inline int rdmsr_safe_regs(u32 regs[8])
-{
-	return native_rdmsr_safe_regs(regs);
-}
-
-static inline int wrmsr_safe_regs(u32 regs[8])
-{
-	return native_wrmsr_safe_regs(regs);
-}
-
 #define rdtscl(low)						\
 	((low) = (u32)__native_read_tsc())
 
@@ -248,6 +211,32 @@ do {                                                            \
 
 #endif	/* !CONFIG_PARAVIRT */
 
+static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
+{
+	u32 gprs[8] = { 0 };
+	int err;
+
+	gprs[1] = msr;
+	gprs[7] = 0x9c5a203a;
+
+	err = rdmsr_safe_regs(gprs);
+
+	*p = gprs[0] | ((u64)gprs[2] << 32);
+
+	return err;
+}
+
+static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
+{
+	u32 gprs[8] = { 0 };
+
+	gprs[0] = (u32)val;
+	gprs[1] = msr;
+	gprs[2] = val >> 32;
+	gprs[7] = 0x9c5a203a;
+
+	return wrmsr_safe_regs(gprs);
+}
 
 #define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val),		\
 					     (u32)((val) >> 32))
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 6cbbabf52707..ebb0cdb60a89 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -128,21 +128,11 @@ static inline u64 paravirt_read_msr(unsigned msr, int *err)
 	return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
 }
 
-static inline int paravirt_rdmsr_regs(u32 *regs)
-{
-	return PVOP_CALL1(int, pv_cpu_ops.rdmsr_regs, regs);
-}
-
 static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
 {
 	return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
 }
 
-static inline int paravirt_wrmsr_regs(u32 *regs)
-{
-	return PVOP_CALL1(int, pv_cpu_ops.wrmsr_regs, regs);
-}
-
 /* These should all do BUG_ON(_err), but our headers are too tangled. */
 #define rdmsr(msr, val1, val2)			\
 do {						\
@@ -176,9 +166,6 @@ do {						\
 	_err;					\
 })
 
-#define rdmsr_safe_regs(regs)	paravirt_rdmsr_regs(regs)
-#define wrmsr_safe_regs(regs)	paravirt_wrmsr_regs(regs)
-
 static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
 {
 	int err;
@@ -186,32 +173,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
 	*p = paravirt_read_msr(msr, &err);
 	return err;
 }
-static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
-{
-	u32 gprs[8] = { 0 };
-	int err;
-
-	gprs[1] = msr;
-	gprs[7] = 0x9c5a203a;
-
-	err = paravirt_rdmsr_regs(gprs);
-
-	*p = gprs[0] | ((u64)gprs[2] << 32);
-
-	return err;
-}
-
-static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
-{
-	u32 gprs[8] = { 0 };
-
-	gprs[0] = (u32)val;
-	gprs[1] = msr;
-	gprs[2] = val >> 32;
-	gprs[7] = 0x9c5a203a;
-
-	return paravirt_wrmsr_regs(gprs);
-}
 
 static inline u64 paravirt_read_tsc(void)
 {
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 8e8b9a4987ee..8613cbb7ba41 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -153,9 +153,7 @@ struct pv_cpu_ops {
 	/* MSR, PMC and TSR operations.
 	   err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
 	u64 (*read_msr)(unsigned int msr, int *err);
-	int (*rdmsr_regs)(u32 *regs);
 	int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
-	int (*wrmsr_regs)(u32 *regs);
 
 	u64 (*read_tsc)(void);
 	u64 (*read_pmc)(int counter);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 9ce885996fd7..17fff18a1031 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -352,9 +352,7 @@ struct pv_cpu_ops pv_cpu_ops = {
 #endif
 	.wbinvd = native_wbinvd,
 	.read_msr = native_read_msr_safe,
-	.rdmsr_regs = native_rdmsr_safe_regs,
 	.write_msr = native_write_msr_safe,
-	.wrmsr_regs = native_wrmsr_safe_regs,
 	.read_tsc = native_read_tsc,
 	.read_pmc = native_read_pmc,
 	.read_tscp = native_read_tscp,
diff --git a/arch/x86/lib/msr-reg-export.c b/arch/x86/lib/msr-reg-export.c
index a311cc59b65d..8d6ef78b5d01 100644
--- a/arch/x86/lib/msr-reg-export.c
+++ b/arch/x86/lib/msr-reg-export.c
@@ -1,5 +1,5 @@
 #include <linux/module.h>
 #include <asm/msr.h>
 
-EXPORT_SYMBOL(native_rdmsr_safe_regs);
-EXPORT_SYMBOL(native_wrmsr_safe_regs);
+EXPORT_SYMBOL(rdmsr_safe_regs);
+EXPORT_SYMBOL(wrmsr_safe_regs);
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index 69fa10623f21..f6d13eefad10 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -6,13 +6,13 @@
 
 #ifdef CONFIG_X86_64
 /*
- * int native_{rdmsr,wrmsr}_safe_regs(u32 gprs[8]);
+ * int {rdmsr,wrmsr}_safe_regs(u32 gprs[8]);
  *
  * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi]
  *
  */
 .macro op_safe_regs op
-ENTRY(native_\op\()_safe_regs)
+ENTRY(\op\()_safe_regs)
 	CFI_STARTPROC
 	pushq_cfi %rbx
 	pushq_cfi %rbp
@@ -45,13 +45,13 @@ ENTRY(native_\op\()_safe_regs)
 
 	_ASM_EXTABLE(1b, 3b)
 	CFI_ENDPROC
-ENDPROC(native_\op\()_safe_regs)
+ENDPROC(\op\()_safe_regs)
 .endm
 
 #else /* X86_32 */
 
 .macro op_safe_regs op
-ENTRY(native_\op\()_safe_regs)
+ENTRY(\op\()_safe_regs)
 	CFI_STARTPROC
 	pushl_cfi %ebx
 	pushl_cfi %ebp
@@ -92,7 +92,7 @@ ENTRY(native_\op\()_safe_regs)
 
 	_ASM_EXTABLE(1b, 3b)
 	CFI_ENDPROC
-ENDPROC(native_\op\()_safe_regs)
+ENDPROC(\op\()_safe_regs)
 .endm
 
 #endif
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e74df9548a02..60f1131eb94f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1116,9 +1116,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.wbinvd = native_wbinvd,
 
 	.read_msr = native_read_msr_safe,
-	.rdmsr_regs = native_rdmsr_safe_regs,
 	.write_msr = xen_write_msr_safe,
-	.wrmsr_regs = native_wrmsr_safe_regs,
 
 	.read_tsc = native_read_tsc,
 	.read_pmc = native_read_pmc,
-- 
cgit v1.2.3


From ecd431d95aa04257e977fd6878e4203ce462e7e9 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Fri, 1 Jun 2012 16:52:36 +0200
Subject: x86, cpu: Fix show_msr MSR accessing function

There's no real reason why, when showing the MSRs on a CPU at boottime,
we should be using the AMD-specific variant. Simply use the generic safe
one which handles #GPs just fine.

Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1338562358-28182-3-git-send-email-bp@amd64.org
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6b9333b429ba..5bbc082c47ad 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -947,7 +947,7 @@ static void __cpuinit __print_cpu_msr(void)
 		index_max = msr_range_array[i].max;
 
 		for (index = index_min; index < index_max; index++) {
-			if (rdmsrl_amd_safe(index, &val))
+			if (rdmsrl_safe(index, &val))
 				continue;
 			printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
 		}
-- 
cgit v1.2.3


From 169e9cbd77db23fe50bc8ba68bf081adb67b4220 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@amd.com>
Date: Fri, 1 Jun 2012 16:52:37 +0200
Subject: x86, cpu, amd: Fix crash as Xen Dom0 on AMD Trinity systems

f7f286a910221 ("x86/amd: Re-enable CPU topology extensions in case BIOS
has disabled it") wrongfully added code which used the AMD-specific
{rd,wr}msr variants for no real reason.

This caused boot panics on xen which wasn't initializing the
{rd,wr}msr_safe_regs pv_ops members properly.

This, in turn, caused a heated discussion leading to us reviewing all
uses of the AMD-specific variants and removing them where unneeded
(almost everywhere except an obscure K8 BIOS fix, see 6b0f43ddfa358).

Finally, this patch switches to the standard {rd,wr}msr*_safe* variants
which should've been used in the first place anyway and avoided unneeded
excitation with xen.

Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Link: http://lkml.kernel.org/r/1338562358-28182-4-git-send-email-bp@amd64.org
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Link: <http://lkml.kernel.org/r/1338383402-3838-1-git-send-email-andre.przywara@amd.com>
[Boris: correct and expand commit message]
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/amd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 146bb6218eec..80ccd99542e6 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -586,9 +586,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	    !cpu_has(c, X86_FEATURE_TOPOEXT)) {
 		u64 val;
 
-		if (!rdmsrl_amd_safe(0xc0011005, &val)) {
+		if (!rdmsrl_safe(0xc0011005, &val)) {
 			val |= 1ULL << 54;
-			wrmsrl_amd_safe(0xc0011005, val);
+			checking_wrmsrl(0xc0011005, val);
 			rdmsrl(0xc0011005, val);
 			if (val & (1ULL << 54)) {
 				set_cpu_cap(c, X86_FEATURE_TOPOEXT);
-- 
cgit v1.2.3


From 2c929ce6f1ed1302be225512b433e6a6554f71a4 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Fri, 1 Jun 2012 16:52:38 +0200
Subject: x86, cpu, amd: Deprecate AMD-specific MSR variants

Now that all users of {rd,wr}msr_amd_safe have been fixed, deprecate its
use by making them private to amd.c and adding warnings when used on
anything else beside K8.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1338562358-28182-5-git-send-email-bp@amd64.org
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/msr.h | 27 ---------------------------
 arch/x86/kernel/cpu/amd.c  | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 81860cc012d1..cb33b5f00267 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -211,33 +211,6 @@ do {                                                            \
 
 #endif	/* !CONFIG_PARAVIRT */
 
-static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
-{
-	u32 gprs[8] = { 0 };
-	int err;
-
-	gprs[1] = msr;
-	gprs[7] = 0x9c5a203a;
-
-	err = rdmsr_safe_regs(gprs);
-
-	*p = gprs[0] | ((u64)gprs[2] << 32);
-
-	return err;
-}
-
-static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
-{
-	u32 gprs[8] = { 0 };
-
-	gprs[0] = (u32)val;
-	gprs[1] = msr;
-	gprs[2] = val >> 32;
-	gprs[7] = 0x9c5a203a;
-
-	return wrmsr_safe_regs(gprs);
-}
-
 #define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val),		\
 					     (u32)((val) >> 32))
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 80ccd99542e6..c928eb26ada6 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -19,6 +19,39 @@
 
 #include "cpu.h"
 
+static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
+{
+	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+	u32 gprs[8] = { 0 };
+	int err;
+
+	WARN_ONCE((c->x86 != 0xf), "%s should only be used on K8!\n", __func__);
+
+	gprs[1] = msr;
+	gprs[7] = 0x9c5a203a;
+
+	err = rdmsr_safe_regs(gprs);
+
+	*p = gprs[0] | ((u64)gprs[2] << 32);
+
+	return err;
+}
+
+static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
+{
+	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+	u32 gprs[8] = { 0 };
+
+	WARN_ONCE((c->x86 != 0xf), "%s should only be used on K8!\n", __func__);
+
+	gprs[0] = (u32)val;
+	gprs[1] = msr;
+	gprs[2] = val >> 32;
+	gprs[7] = 0x9c5a203a;
+
+	return wrmsr_safe_regs(gprs);
+}
+
 #ifdef CONFIG_X86_32
 /*
  *	B step AMD K6 before B 9730xxxx have hardware bugs that can cause
-- 
cgit v1.2.3


From 715c85b1fc824e9cd0ea07d6ceb80d2262f32e90 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 7 Jun 2012 13:32:04 -0700
Subject: x86, cpu: Rename checking_wrmsrl() to wrmsrl_safe()

Rename checking_wrmsrl() to wrmsrl_safe(), to match the naming
convention used by all the other MSR access functions/macros.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/msr.h             |  2 +-
 arch/x86/kernel/cpu/amd.c              |  4 ++--
 arch/x86/kernel/cpu/perf_event.c       |  2 +-
 arch/x86/kernel/cpu/perf_event_intel.c |  6 +++---
 arch/x86/kernel/cpu/perf_event_p4.c    | 14 +++++++-------
 arch/x86/kernel/cpu/perf_event_p6.c    |  4 ++--
 arch/x86/kernel/process_64.c           |  4 ++--
 arch/x86/vdso/vdso32-setup.c           |  6 +++---
 8 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index cb33b5f00267..fe83d74a920d 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -211,7 +211,7 @@ do {                                                            \
 
 #endif	/* !CONFIG_PARAVIRT */
 
-#define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val),		\
+#define wrmsrl_safe(msr, val) wrmsr_safe((msr), (u32)(val),		\
 					     (u32)((val) >> 32))
 
 #define write_tsc(val1, val2) wrmsr(MSR_IA32_TSC, (val1), (val2))
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c928eb26ada6..9d92e19039f0 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -621,7 +621,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
 		if (!rdmsrl_safe(0xc0011005, &val)) {
 			val |= 1ULL << 54;
-			checking_wrmsrl(0xc0011005, val);
+			wrmsrl_safe(0xc0011005, val);
 			rdmsrl(0xc0011005, val);
 			if (val & (1ULL << 54)) {
 				set_cpu_cap(c, X86_FEATURE_TOPOEXT);
@@ -712,7 +712,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 		err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask);
 		if (err == 0) {
 			mask |= (1 << 10);
-			checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask);
+			wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask);
 		}
 	}
 
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6da0183..4e3ba9cb5a4e 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -222,7 +222,7 @@ static bool check_hw_exists(void)
 	 * that don't trap on the MSR access and always return 0s.
 	 */
 	val = 0xabcdUL;
-	ret = checking_wrmsrl(x86_pmu_event_addr(0), val);
+	ret = wrmsrl_safe(x86_pmu_event_addr(0), val);
 	ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
 	if (ret || val != val_new)
 		goto msr_fail;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546ec6aef..7789aa37c746 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1003,11 +1003,11 @@ static void intel_pmu_reset(void)
 	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
 
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
-		checking_wrmsrl(x86_pmu_event_addr(idx),  0ull);
+		wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
+		wrmsrl_safe(x86_pmu_event_addr(idx),  0ull);
 	}
 	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
-		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
+		wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
 
 	if (ds)
 		ds->bts_index = ds->bts_buffer_base;
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 47124a73dd73..6c82e4037989 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -895,8 +895,8 @@ static void p4_pmu_disable_pebs(void)
 	 * So at moment let leave metrics turned on forever -- it's
 	 * ok for now but need to be revisited!
 	 *
-	 * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0);
-	 * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
+	 * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)0);
+	 * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
 	 */
 }
 
@@ -909,7 +909,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
 	 * state we need to clear P4_CCCR_OVF, otherwise interrupt get
 	 * asserted again and again
 	 */
-	(void)checking_wrmsrl(hwc->config_base,
+	(void)wrmsrl_safe(hwc->config_base,
 		(u64)(p4_config_unpack_cccr(hwc->config)) &
 			~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
 }
@@ -943,8 +943,8 @@ static void p4_pmu_enable_pebs(u64 config)
 
 	bind = &p4_pebs_bind_map[idx];
 
-	(void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE,	(u64)bind->metric_pebs);
-	(void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT,	(u64)bind->metric_vert);
+	(void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE,	(u64)bind->metric_pebs);
+	(void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT,	(u64)bind->metric_vert);
 }
 
 static void p4_pmu_enable_event(struct perf_event *event)
@@ -978,8 +978,8 @@ static void p4_pmu_enable_event(struct perf_event *event)
 	 */
 	p4_pmu_enable_pebs(hwc->config);
 
-	(void)checking_wrmsrl(escr_addr, escr_conf);
-	(void)checking_wrmsrl(hwc->config_base,
+	(void)wrmsrl_safe(escr_addr, escr_conf);
+	(void)wrmsrl_safe(hwc->config_base,
 				(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
 }
 
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 32bcfc7dd230..e4dd0f7a0453 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -71,7 +71,7 @@ p6_pmu_disable_event(struct perf_event *event)
 	if (cpuc->enabled)
 		val |= ARCH_PERFMON_EVENTSEL_ENABLE;
 
-	(void)checking_wrmsrl(hwc->config_base, val);
+	(void)wrmsrl_safe(hwc->config_base, val);
 }
 
 static void p6_pmu_enable_event(struct perf_event *event)
@@ -84,7 +84,7 @@ static void p6_pmu_enable_event(struct perf_event *event)
 	if (cpuc->enabled)
 		val |= ARCH_PERFMON_EVENTSEL_ENABLE;
 
-	(void)checking_wrmsrl(hwc->config_base, val);
+	(void)wrmsrl_safe(hwc->config_base, val);
 }
 
 PMU_FORMAT_ATTR(event,	"config:0-7"	);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 61cdf7fdf099..3e215ba68766 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -466,7 +466,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 			task->thread.gs = addr;
 			if (doit) {
 				load_gs_index(0);
-				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
+				ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
 			}
 		}
 		put_cpu();
@@ -494,7 +494,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 				/* set the selector to 0 to not confuse
 				   __switch_to */
 				loadsegment(fs, 0);
-				ret = checking_wrmsrl(MSR_FS_BASE, addr);
+				ret = wrmsrl_safe(MSR_FS_BASE, addr);
 			}
 		}
 		put_cpu();
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 66e6d9359826..0faad646f5fd 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -205,9 +205,9 @@ void syscall32_cpu_init(void)
 {
 	/* Load these always in case some future AMD CPU supports
 	   SYSENTER from compat mode too. */
-	checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-	checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
-	checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
+	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
 
 	wrmsrl(MSR_CSTAR, ia32_cstar_target);
 }
-- 
cgit v1.2.3


From 9d8e10667624ea6411f04495aef1fa4a8a778ee8 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Thu, 7 Jun 2012 15:14:49 +0200
Subject: x86/apic: Factor out default vector_allocation_domain() operation

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/20120607131449.GC4759@dhcp-26-207.brq.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/apic.h          | 21 +++++++++++++++++++++
 arch/x86/kernel/apic/apic_flat_64.c  | 22 +---------------------
 arch/x86/kernel/apic/apic_noop.c     |  3 +--
 arch/x86/kernel/apic/apic_numachip.c |  8 +-------
 arch/x86/kernel/apic/bigsmp_32.c     |  8 +-------
 arch/x86/kernel/apic/es7000_32.c     | 19 ++-----------------
 arch/x86/kernel/apic/numaq_32.c      | 16 +---------------
 arch/x86/kernel/apic/probe_32.c      | 17 +----------------
 arch/x86/kernel/apic/summit_32.c     | 16 +---------------
 arch/x86/kernel/apic/x2apic_phys.c   | 11 +----------
 arch/x86/kernel/apic/x2apic_uv_x.c   |  8 +-------
 11 files changed, 32 insertions(+), 117 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index bef571769e68..feb2dbdae9ec 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -615,6 +615,27 @@ extern unsigned int
 default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 			       const struct cpumask *andmask);
 
+static inline void
+flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+	/* Careful. Some cpus do not strictly honor the set of cpus
+	 * specified in the interrupt destination when using lowest
+	 * priority interrupt delivery mode.
+	 *
+	 * In particular there was a hyperthreading cpu observed to
+	 * deliver interrupts to the wrong hyperthread when only one
+	 * hyperthread was specified in the interrupt desitination.
+	 */
+	cpumask_clear(retmask);
+	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
+}
+
+static inline void
+default_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+	cpumask_copy(retmask, cpumask_of(cpu));
+}
+
 static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid)
 {
 	return physid_isset(apicid, *map);
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 55b97ce4fa19..bddc92566d0a 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -36,20 +36,6 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 1;
 }
 
-static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	/* Careful. Some cpus do not strictly honor the set of cpus
-	 * specified in the interrupt destination when using lowest
-	 * priority interrupt delivery mode.
-	 *
-	 * In particular there was a hyperthreading cpu observed to
-	 * deliver interrupts to the wrong hyperthread when only one
-	 * hyperthread was specified in the interrupt desitination.
-	 */
-	cpumask_clear(retmask);
-	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
 /*
  * Set up the logical destination ID.
  *
@@ -257,12 +243,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;
 }
 
-static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
-}
-
 static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
 {
 	default_send_IPI_mask_sequence_phys(cpumask, vector);
@@ -309,7 +289,7 @@ static struct apic apic_physflat =  {
 	.check_apicid_used		= NULL,
 	.check_apicid_present		= NULL,
 
-	.vector_allocation_domain	= physflat_vector_allocation_domain,
+	.vector_allocation_domain	= default_vector_allocation_domain,
 	/* not needed, but shouldn't hurt: */
 	.init_apic_ldr			= flat_init_apic_ldr,
 
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 7c3dd4fe0686..3e43cf528939 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -104,8 +104,7 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	if (cpu != 0)
 		pr_warning("APIC: Vector allocated for non-BSP cpu\n");
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
+	cpumask_copy(retmask, cpumask_of(cpu));
 }
 
 static u32 noop_apic_read(u32 reg)
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index dba4bf2ed566..c028132ad358 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -72,12 +72,6 @@ static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
 	return initial_apic_id >> index_msb;
 }
 
-static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
-}
-
 static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
 {
 	union numachip_csr_g3_ext_irq_gen int_gen;
@@ -222,7 +216,7 @@ static struct apic apic_numachip __refconst = {
 	.check_apicid_used		= NULL,
 	.check_apicid_present		= NULL,
 
-	.vector_allocation_domain	= numachip_vector_allocation_domain,
+	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= flat_init_apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 907aa3d112a6..df342fe4d6aa 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -142,12 +142,6 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
 	{ } /* NULL entry stops DMI scanning */
 };
 
-static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
-}
-
 static int probe_bigsmp(void)
 {
 	if (def_to_bigsmp)
@@ -176,7 +170,7 @@ static struct apic apic_bigsmp = {
 	.check_apicid_used		= bigsmp_check_apicid_used,
 	.check_apicid_present		= bigsmp_check_apicid_present,
 
-	.vector_allocation_domain	= bigsmp_vector_allocation_domain,
+	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= bigsmp_init_apic_ldr,
 
 	.ioapic_phys_id_map		= bigsmp_ioapic_phys_id_map,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index db4ab1be3c79..3c42865757e2 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -394,21 +394,6 @@ static void es7000_enable_apic_mode(void)
 		WARN(1, "Command failed, status = %x\n", mip_status);
 }
 
-static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	/* Careful. Some cpus do not strictly honor the set of cpus
-	 * specified in the interrupt destination when using lowest
-	 * priority interrupt delivery mode.
-	 *
-	 * In particular there was a hyperthreading cpu observed to
-	 * deliver interrupts to the wrong hyperthread when only one
-	 * hyperthread was specified in the interrupt desitination.
-	 */
-	cpumask_clear(retmask);
-	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
-
 static void es7000_wait_for_init_deassert(atomic_t *deassert)
 {
 	while (!atomic_read(deassert))
@@ -638,7 +623,7 @@ static struct apic __refdata apic_es7000_cluster = {
 	.check_apicid_used		= es7000_check_apicid_used,
 	.check_apicid_present		= es7000_check_apicid_present,
 
-	.vector_allocation_domain	= es7000_vector_allocation_domain,
+	.vector_allocation_domain	= flat_vector_allocation_domain,
 	.init_apic_ldr			= es7000_init_apic_ldr_cluster,
 
 	.ioapic_phys_id_map		= es7000_ioapic_phys_id_map,
@@ -705,7 +690,7 @@ static struct apic __refdata apic_es7000 = {
 	.check_apicid_used		= es7000_check_apicid_used,
 	.check_apicid_present		= es7000_check_apicid_present,
 
-	.vector_allocation_domain	= es7000_vector_allocation_domain,
+	.vector_allocation_domain	= flat_vector_allocation_domain,
 	.init_apic_ldr			= es7000_init_apic_ldr,
 
 	.ioapic_phys_id_map		= es7000_ioapic_phys_id_map,
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index f00a68cca37a..eb2d466fd81a 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -441,20 +441,6 @@ static int probe_numaq(void)
 	return found_numaq;
 }
 
-static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	/* Careful. Some cpus do not strictly honor the set of cpus
-	 * specified in the interrupt destination when using lowest
-	 * priority interrupt delivery mode.
-	 *
-	 * In particular there was a hyperthreading cpu observed to
-	 * deliver interrupts to the wrong hyperthread when only one
-	 * hyperthread was specified in the interrupt desitination.
-	 */
-	cpumask_clear(retmask);
-	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
 static void numaq_setup_portio_remap(void)
 {
 	int num_quads = num_online_nodes();
@@ -491,7 +477,7 @@ static struct apic __refdata apic_numaq = {
 	.check_apicid_used		= numaq_check_apicid_used,
 	.check_apicid_present		= numaq_check_apicid_present,
 
-	.vector_allocation_domain	= numaq_vector_allocation_domain,
+	.vector_allocation_domain	= flat_vector_allocation_domain,
 	.init_apic_ldr			= numaq_init_apic_ldr,
 
 	.ioapic_phys_id_map		= numaq_ioapic_phys_id_map,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 71b6ac48ab26..2c6f003b2e4b 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -66,21 +66,6 @@ static void setup_apic_flat_routing(void)
 #endif
 }
 
-static void default_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	/*
-	 * Careful. Some cpus do not strictly honor the set of cpus
-	 * specified in the interrupt destination when using lowest
-	 * priority interrupt delivery mode.
-	 *
-	 * In particular there was a hyperthreading cpu observed to
-	 * deliver interrupts to the wrong hyperthread when only one
-	 * hyperthread was specified in the interrupt desitination.
-	 */
-	cpumask_clear(retmask);
-	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
 /* should be called last. */
 static int probe_default(void)
 {
@@ -105,7 +90,7 @@ static struct apic apic_default = {
 	.check_apicid_used		= default_check_apicid_used,
 	.check_apicid_present		= default_check_apicid_present,
 
-	.vector_allocation_domain	= default_vector_allocation_domain,
+	.vector_allocation_domain	= flat_vector_allocation_domain,
 	.init_apic_ldr			= default_init_apic_ldr,
 
 	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 659897c00755..35d254c1fec2 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -320,20 +320,6 @@ static int probe_summit(void)
 	return 0;
 }
 
-static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	/* Careful. Some cpus do not strictly honor the set of cpus
-	 * specified in the interrupt destination when using lowest
-	 * priority interrupt delivery mode.
-	 *
-	 * In particular there was a hyperthreading cpu observed to
-	 * deliver interrupts to the wrong hyperthread when only one
-	 * hyperthread was specified in the interrupt desitination.
-	 */
-	cpumask_clear(retmask);
-	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
 #ifdef CONFIG_X86_SUMMIT_NUMA
 static struct rio_table_hdr *rio_table_hdr;
 static struct scal_detail   *scal_devs[MAX_NUMNODES];
@@ -509,7 +495,7 @@ static struct apic apic_summit = {
 	.check_apicid_used		= summit_check_apicid_used,
 	.check_apicid_present		= summit_check_apicid_present,
 
-	.vector_allocation_domain	= summit_vector_allocation_domain,
+	.vector_allocation_domain	= flat_vector_allocation_domain,
 	.init_apic_ldr			= summit_init_apic_ldr,
 
 	.ioapic_phys_id_map		= summit_ioapic_phys_id_map,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index f730269edef2..f109388a0e80 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -88,15 +88,6 @@ static int x2apic_phys_probe(void)
 	return apic == &apic_x2apic_phys;
 }
 
-/*
- * Each logical cpu is in its own vector allocation domain.
- */
-static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
-}
-
 static struct apic apic_x2apic_phys = {
 
 	.name				= "physical x2apic",
@@ -114,7 +105,7 @@ static struct apic apic_x2apic_phys = {
 	.check_apicid_used		= NULL,
 	.check_apicid_present		= NULL,
 
-	.vector_allocation_domain	= x2apic_vector_allocation_domain,
+	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= init_x2apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 16efb92bfea5..df89a7d78748 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -185,12 +185,6 @@ EXPORT_SYMBOL_GPL(uv_possible_blades);
 unsigned long sn_rtc_cycles_per_second;
 EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 
-static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
-}
-
 static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
 {
 #ifdef CONFIG_SMP
@@ -363,7 +357,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
 	.check_apicid_used		= NULL,
 	.check_apicid_present		= NULL,
 
-	.vector_allocation_domain	= uv_vector_allocation_domain,
+	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= uv_init_apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
-- 
cgit v1.2.3


From 1bccd58bfffc5a677051937b332b71f0686187c1 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Thu, 7 Jun 2012 15:15:15 +0200
Subject: x86/apic: Try to spread IRQ vectors to different priority levels

When assigning a new vector it is primarially done by adding 8
to the previously given out vector number. Hence, two
consequently allocated vector numbers would likely fall into the
same priority level. Try to spread vector numbers to different
priority levels better by changing the step from 8 to 16.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/20120607131514.GD4759@dhcp-26-207.brq.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/io_apic.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 74c569791e75..05af3d341aaa 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1112,7 +1112,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
-	static int current_offset = VECTOR_OFFSET_START % 8;
+	static int current_offset = VECTOR_OFFSET_START % 16;
 	unsigned int old_vector;
 	int cpu, err;
 	cpumask_var_t tmp_mask;
@@ -1148,10 +1148,9 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 		vector = current_vector;
 		offset = current_offset;
 next:
-		vector += 8;
+		vector += 16;
 		if (vector >= first_system_vector) {
-			/* If out of vectors on large boxen, must share them. */
-			offset = (offset + 1) % 8;
+			offset = (offset + 1) % 16;
 			vector = FIRST_EXTERNAL_VECTOR + offset;
 		}
 		if (unlikely(current_vector == vector))
-- 
cgit v1.2.3


From 8637e38aff14d048b649075114023023a2e80fba Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Thu, 7 Jun 2012 15:15:44 +0200
Subject: x86/apic: Avoid useless scanning thru a cpumask in
 assign_irq_vector()

In case of static vector allocation domains (i.e. flat) if all
vector numbers are exhausted, an attempt to assign a new vector
will lead to useless scans through all CPUs in the cpumask, even
though it is known that each new pass would fail. Make this
corner case less painful by letting report whether the vector
allocation domain depends on passed arguments or not and stop
scanning early.

The same could have been achived by introducing a static flag to
the apic operations. But let's allow vector_allocation_domain()
have more intelligence here and decide dynamically, in case we
would need it in the future.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/20120607131542.GE4759@dhcp-26-207.brq.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/apic.h      |  8 +++++---
 arch/x86/kernel/apic/apic_noop.c |  3 ++-
 arch/x86/kernel/apic/io_apic.c   | 12 +++++++++---
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index feb2dbdae9ec..e3fecd50d5ca 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -306,7 +306,7 @@ struct apic {
 	unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid);
 	unsigned long (*check_apicid_present)(int apicid);
 
-	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
+	bool (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
 	void (*init_apic_ldr)(void);
 
 	void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
@@ -615,7 +615,7 @@ extern unsigned int
 default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 			       const struct cpumask *andmask);
 
-static inline void
+static inline bool
 flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
@@ -628,12 +628,14 @@ flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
 	 */
 	cpumask_clear(retmask);
 	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
+	return false;
 }
 
-static inline void
+static inline bool
 default_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	cpumask_copy(retmask, cpumask_of(cpu));
+	return true;
 }
 
 static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid)
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 3e43cf528939..ac9edf247b15 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -100,11 +100,12 @@ static unsigned long noop_check_apicid_present(int bit)
 	return physid_isset(bit, phys_cpu_present_map);
 }
 
-static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
+static bool noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	if (cpu != 0)
 		pr_warning("APIC: Vector allocated for non-BSP cpu\n");
 	cpumask_copy(retmask, cpumask_of(cpu));
+	return true;
 }
 
 static u32 noop_apic_read(u32 reg)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 05af3d341aaa..4061a7dee5c9 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1137,8 +1137,9 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 	for_each_cpu_and(cpu, mask, cpu_online_mask) {
 		int new_cpu;
 		int vector, offset;
+		bool more_domains;
 
-		apic->vector_allocation_domain(cpu, tmp_mask);
+		more_domains = apic->vector_allocation_domain(cpu, tmp_mask);
 
 		if (cpumask_subset(tmp_mask, cfg->domain)) {
 			free_cpumask_var(tmp_mask);
@@ -1153,8 +1154,13 @@ next:
 			offset = (offset + 1) % 16;
 			vector = FIRST_EXTERNAL_VECTOR + offset;
 		}
-		if (unlikely(current_vector == vector))
-			continue;
+
+		if (unlikely(current_vector == vector)) {
+			if (more_domains)
+				continue;
+			else
+				break;
+		}
 
 		if (test_bit(vector, used_vectors))
 			goto next;
-- 
cgit v1.2.3


From ff164324123c0fe181d8de7dadcc7b3fbe25f2cf Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Thu, 7 Jun 2012 15:15:59 +0200
Subject: x86/apic: Make cpu_mask_to_apicid() operations return error code

Current cpu_mask_to_apicid() and cpu_mask_to_apicid_and()
implementations have few shortcomings:

1. A value returned by cpu_mask_to_apicid() is written to
hardware registers unconditionally. Should BAD_APICID get ever
returned it will be written to a hardware too. But the value of
BAD_APICID is not universal across all hardware in all modes and
might cause unexpected results, i.e. interrupts might get routed
to CPUs that are not configured to receive it.

2. Because the value of BAD_APICID is not universal it is
counter- intuitive to return it for a hardware where it does not
make sense (i.e. x2apic).

3. cpu_mask_to_apicid_and() operation is thought as an
complement to cpu_mask_to_apicid() that only applies a AND mask
on top of a cpumask being passed. Yet, as consequence of 18374d8
commit the two operations are inconsistent in that of:
  cpu_mask_to_apicid() should not get a offline CPU with the cpumask
  cpu_mask_to_apicid_and() should not fail and return BAD_APICID
These limitations are impossible to realize just from looking at
the operations prototypes.

Most of these shortcomings are resolved by returning a error
code instead of BAD_APICID. As the result, faults are reported
back early rather than possibilities to cause a unexpected
behaviour exist (in case of [1]).

The only exception is setup_timer_IRQ0_pin() routine. Although
obviously controversial to this fix, its existing behaviour is
preserved to not break the fragile check_timer() and would
better addressed in a separate fix.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/20120607131559.GF4759@dhcp-26-207.brq.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/apic.h           | 44 ++++++++++++------
 arch/x86/kernel/apic/apic.c           | 33 +++++++------
 arch/x86/kernel/apic/es7000_32.c      | 21 +++++----
 arch/x86/kernel/apic/io_apic.c        | 88 +++++++++++++++++++++++------------
 arch/x86/kernel/apic/numaq_32.c       | 14 ++++--
 arch/x86/kernel/apic/summit_32.c      | 22 +++++----
 arch/x86/kernel/apic/x2apic_cluster.c | 24 ++++++----
 arch/x86/kernel/apic/x2apic_uv_x.c    | 27 +++++++----
 arch/x86/platform/uv/uv_irq.c         |  7 ++-
 drivers/iommu/intel_irq_remapping.c   | 13 ++++--
 10 files changed, 188 insertions(+), 105 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index e3fecd50d5ca..ae91f9c7e360 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -331,9 +331,11 @@ struct apic {
 	unsigned long (*set_apic_id)(unsigned int id);
 	unsigned long apic_id_mask;
 
-	unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
-	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
-					       const struct cpumask *andmask);
+	int (*cpu_mask_to_apicid)(const struct cpumask *cpumask,
+				  unsigned int *apicid);
+	int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
+				      const struct cpumask *andmask,
+				      unsigned int *apicid);
 
 	/* ipi */
 	void (*send_IPI_mask)(const struct cpumask *mask, int vector);
@@ -591,29 +593,45 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
 
 #endif
 
-static inline unsigned int
-flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static inline int
+__flat_cpu_mask_to_apicid(unsigned long cpu_mask, unsigned int *apicid)
 {
-	return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+	cpu_mask &= APIC_ALL_CPUS;
+	if (likely(cpu_mask)) {
+		*apicid = (unsigned int)cpu_mask;
+		return 0;
+	} else {
+		return -EINVAL;
+	}
 }
 
-static inline unsigned int
+static inline int
+flat_cpu_mask_to_apicid(const struct cpumask *cpumask,
+			unsigned int *apicid)
+{
+	return __flat_cpu_mask_to_apicid(cpumask_bits(cpumask)[0], apicid);
+}
+
+static inline int
 flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			    const struct cpumask *andmask)
+			    const struct cpumask *andmask,
+			    unsigned int *apicid)
 {
 	unsigned long mask1 = cpumask_bits(cpumask)[0];
 	unsigned long mask2 = cpumask_bits(andmask)[0];
 	unsigned long mask3 = cpumask_bits(cpu_online_mask)[0];
 
-	return (unsigned int)(mask1 & mask2 & mask3);
+	return __flat_cpu_mask_to_apicid(mask1 & mask2 & mask3, apicid);
 }
 
-extern unsigned int
-default_cpu_mask_to_apicid(const struct cpumask *cpumask);
+extern int
+default_cpu_mask_to_apicid(const struct cpumask *cpumask,
+			   unsigned int *apicid);
 
-extern unsigned int
+extern int
 default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			       const struct cpumask *andmask);
+			       const struct cpumask *andmask,
+			       unsigned int *apicid);
 
 static inline bool
 flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 96a2608252f1..b8d92606f84f 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2123,24 +2123,26 @@ void default_init_apic_ldr(void)
 	apic_write(APIC_LDR, val);
 }
 
-unsigned int default_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static inline int __default_cpu_to_apicid(int cpu, unsigned int *apicid)
 {
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	cpu = cpumask_first(cpumask);
-	if (likely((unsigned)cpu < nr_cpu_ids))
-		return per_cpu(x86_cpu_to_apicid, cpu);
+	if (likely((unsigned int)cpu < nr_cpu_ids)) {
+		*apicid = per_cpu(x86_cpu_to_apicid, cpu);
+		return 0;
+	} else {
+		return -EINVAL;
+	}
+}
 
-	return BAD_APICID;
+int default_cpu_mask_to_apicid(const struct cpumask *cpumask,
+			       unsigned int *apicid)
+{
+	int cpu = cpumask_first(cpumask);
+	return __default_cpu_to_apicid(cpu, apicid);
 }
 
-unsigned int
-default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			       const struct cpumask *andmask)
+int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+				   const struct cpumask *andmask,
+				   unsigned int *apicid)
 {
 	int cpu;
 
@@ -2148,7 +2150,8 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 		if (cpumask_test_cpu(cpu, cpu_online_mask))
 			break;
 	}
-	return per_cpu(x86_cpu_to_apicid, cpu);
+
+	return __default_cpu_to_apicid(cpu, apicid);
 }
 
 /*
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 3c42865757e2..515ebb00a9fc 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -525,7 +525,8 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
 	return 1;
 }
 
-static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static int
+es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
 {
 	unsigned int round = 0;
 	int cpu, uninitialized_var(apicid);
@@ -539,31 +540,33 @@ static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
 		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
 			WARN(1, "Not a valid mask!");
 
-			return BAD_APICID;
+			return -EINVAL;
 		}
 		apicid = new_apicid;
 		round++;
 	}
-	return apicid;
+	*dest_id = apicid;
+	return 0;
 }
 
-static unsigned int
+static int
 es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
-			      const struct cpumask *andmask)
+			      const struct cpumask *andmask,
+			      unsigned int *apicid)
 {
-	int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
+	*apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
 	cpumask_var_t cpumask;
 
 	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
-		return apicid;
+		return 0;
 
 	cpumask_and(cpumask, inmask, andmask);
 	cpumask_and(cpumask, cpumask, cpu_online_mask);
-	apicid = es7000_cpu_mask_to_apicid(cpumask);
+	es7000_cpu_mask_to_apicid(cpumask, apicid);
 
 	free_cpumask_var(cpumask);
 
-	return apicid;
+	return 0;
 }
 
 static int es7000_phys_pkg_id(int cpuid_apic, int index_msb)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 4061a7dee5c9..0deb773404e5 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1359,7 +1359,14 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
 	if (assign_irq_vector(irq, cfg, apic->target_cpus()))
 		return;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+	if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(),
+					 &dest)) {
+		pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n",
+			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
+		__clear_irq_vector(irq, cfg);
+
+		return;
+	}
 
 	apic_printk(APIC_VERBOSE,KERN_DEBUG
 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1474,6 +1481,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
 					unsigned int pin, int vector)
 {
 	struct IO_APIC_route_entry entry;
+	unsigned int dest;
 
 	if (irq_remapping_enabled)
 		return;
@@ -1484,9 +1492,12 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
 	 * We use logical delivery to get the timer IRQ
 	 * to the first CPU.
 	 */
+	if (unlikely(apic->cpu_mask_to_apicid(apic->target_cpus(), &dest)))
+		dest = BAD_APICID;
+
 	entry.dest_mode = apic->irq_dest_mode;
 	entry.mask = 0;			/* don't mask IRQ for edge */
-	entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus());
+	entry.dest = dest;
 	entry.delivery_mode = apic->irq_delivery_mode;
 	entry.polarity = 0;
 	entry.trigger = 0;
@@ -2245,16 +2256,25 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
 			  unsigned int *dest_id)
 {
 	struct irq_cfg *cfg = data->chip_data;
+	unsigned int irq = data->irq;
+	int err;
 
 	if (!cpumask_intersects(mask, cpu_online_mask))
-		return -1;
+		return -EINVAL;
 
-	if (assign_irq_vector(data->irq, data->chip_data, mask))
-		return -1;
+	err = assign_irq_vector(irq, cfg, mask);
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
+	if (err) {
+		if (assign_irq_vector(irq, cfg, data->affinity))
+			pr_err("Failed to recover vector for irq %d\n", irq);
+		return err;
+	}
 
 	cpumask_copy(data->affinity, mask);
 
-	*dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain);
 	return 0;
 }
 
@@ -3040,7 +3060,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
 	if (err)
 		return err;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+	err = apic->cpu_mask_to_apicid_and(cfg->domain,
+					   apic->target_cpus(), &dest);
+	if (err)
+		return err;
 
 	if (irq_remapped(cfg)) {
 		compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
@@ -3361,6 +3384,8 @@ static struct irq_chip ht_irq_chip = {
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
 	struct irq_cfg *cfg;
+	struct ht_irq_msg msg;
+	unsigned dest;
 	int err;
 
 	if (disable_apic)
@@ -3368,36 +3393,37 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 
 	cfg = irq_cfg(irq);
 	err = assign_irq_vector(irq, cfg, apic->target_cpus());
-	if (!err) {
-		struct ht_irq_msg msg;
-		unsigned dest;
+	if (err)
+		return err;
 
-		dest = apic->cpu_mask_to_apicid_and(cfg->domain,
-						    apic->target_cpus());
+	err = apic->cpu_mask_to_apicid_and(cfg->domain,
+					   apic->target_cpus(), &dest);
+	if (err)
+		return err;
 
-		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+	msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
-		msg.address_lo =
-			HT_IRQ_LOW_BASE |
-			HT_IRQ_LOW_DEST_ID(dest) |
-			HT_IRQ_LOW_VECTOR(cfg->vector) |
-			((apic->irq_dest_mode == 0) ?
-				HT_IRQ_LOW_DM_PHYSICAL :
-				HT_IRQ_LOW_DM_LOGICAL) |
-			HT_IRQ_LOW_RQEOI_EDGE |
-			((apic->irq_delivery_mode != dest_LowestPrio) ?
-				HT_IRQ_LOW_MT_FIXED :
-				HT_IRQ_LOW_MT_ARBITRATED) |
-			HT_IRQ_LOW_IRQ_MASKED;
+	msg.address_lo =
+		HT_IRQ_LOW_BASE |
+		HT_IRQ_LOW_DEST_ID(dest) |
+		HT_IRQ_LOW_VECTOR(cfg->vector) |
+		((apic->irq_dest_mode == 0) ?
+			HT_IRQ_LOW_DM_PHYSICAL :
+			HT_IRQ_LOW_DM_LOGICAL) |
+		HT_IRQ_LOW_RQEOI_EDGE |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			HT_IRQ_LOW_MT_FIXED :
+			HT_IRQ_LOW_MT_ARBITRATED) |
+		HT_IRQ_LOW_IRQ_MASKED;
 
-		write_ht_irq_msg(irq, &msg);
+	write_ht_irq_msg(irq, &msg);
 
-		irq_set_chip_and_handler_name(irq, &ht_irq_chip,
-					      handle_edge_irq, "edge");
+	irq_set_chip_and_handler_name(irq, &ht_irq_chip,
+				      handle_edge_irq, "edge");
 
-		dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
-	}
-	return err;
+	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
+
+	return 0;
 }
 #endif /* CONFIG_HT_IRQ */
 
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index eb2d466fd81a..2b55514c328b 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -406,16 +406,20 @@ static inline int numaq_check_phys_apicid_present(int phys_apicid)
  * We use physical apicids here, not logical, so just return the default
  * physical broadcast to stop people from breaking us
  */
-static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static int
+numaq_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid)
 {
-	return 0x0F;
+	*apicid = 0x0F;
+	return 0;
 }
 
-static inline unsigned int
+static int
 numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			     const struct cpumask *andmask)
+			     const struct cpumask *andmask,
+			     unsigned int *apicid)
 {
-	return 0x0F;
+	*apicid = 0x0F;
+	return 0;
 }
 
 /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 35d254c1fec2..5766d84f12d6 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -263,7 +263,8 @@ static int summit_check_phys_apicid_present(int physical_apicid)
 	return 1;
 }
 
-static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static int
+summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
 {
 	unsigned int round = 0;
 	int cpu, apicid = 0;
@@ -276,30 +277,33 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
 
 		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
 			printk("%s: Not a valid mask!\n", __func__);
-			return BAD_APICID;
+			return -EINVAL;
 		}
 		apicid |= new_apicid;
 		round++;
 	}
-	return apicid;
+	*dest_id = apicid;
+	return 0;
 }
 
-static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
-			      const struct cpumask *andmask)
+static int
+summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
+			      const struct cpumask *andmask,
+			      unsigned int *apicid)
 {
-	int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
+	*apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
 	cpumask_var_t cpumask;
 
 	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
-		return apicid;
+		return 0;
 
 	cpumask_and(cpumask, inmask, andmask);
 	cpumask_and(cpumask, cpumask, cpu_online_mask);
-	apicid = summit_cpu_mask_to_apicid(cpumask);
+	summit_cpu_mask_to_apicid(cpumask, apicid);
 
 	free_cpumask_var(cpumask);
 
-	return apicid;
+	return 0;
 }
 
 /*
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 612622c47dfb..5f86f79335f4 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -96,24 +96,26 @@ static void x2apic_send_IPI_all(int vector)
 	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static int
+x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid)
 {
 	int cpu = cpumask_first(cpumask);
-	u32 dest = 0;
 	int i;
 
-	if (cpu > nr_cpu_ids)
-		return BAD_APICID;
+	if (cpu >= nr_cpu_ids)
+		return -EINVAL;
 
+	*apicid = 0;
 	for_each_cpu_and(i, cpumask, per_cpu(cpus_in_cluster, cpu))
-		dest |= per_cpu(x86_cpu_to_logical_apicid, i);
+		*apicid |= per_cpu(x86_cpu_to_logical_apicid, i);
 
-	return dest;
+	return 0;
 }
 
-static unsigned int
+static int
 x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			      const struct cpumask *andmask)
+			      const struct cpumask *andmask,
+			      unsigned int *apicid)
 {
 	u32 dest = 0;
 	u16 cluster;
@@ -128,7 +130,7 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	}
 
 	if (!dest)
-		return BAD_APICID;
+		return -EINVAL;
 
 	for_each_cpu_and(i, cpumask, andmask) {
 		if (!cpumask_test_cpu(i, cpu_online_mask))
@@ -138,7 +140,9 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 		dest |= per_cpu(x86_cpu_to_logical_apicid, i);
 	}
 
-	return dest;
+	*apicid = dest;
+
+	return 0;
 }
 
 static void init_x2apic_ldr(void)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index df89a7d78748..2f3030fef31e 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -269,23 +269,31 @@ static void uv_init_apic_ldr(void)
 {
 }
 
-static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static inline int __uv_cpu_to_apicid(int cpu, unsigned int *apicid)
+{
+	if (likely((unsigned int)cpu < nr_cpu_ids)) {
+		*apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
+		return 0;
+	} else {
+		return -EINVAL;
+	}
+}
+
+static int
+uv_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid)
 {
 	/*
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
 	int cpu = cpumask_first(cpumask);
-
-	if ((unsigned)cpu < nr_cpu_ids)
-		return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
-	else
-		return BAD_APICID;
+	return __uv_cpu_to_apicid(cpu, apicid);
 }
 
-static unsigned int
+static int
 uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			  const struct cpumask *andmask)
+			  const struct cpumask *andmask,
+			  unsigned int *apicid)
 {
 	int cpu;
 
@@ -297,7 +305,8 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 		if (cpumask_test_cpu(cpu, cpu_online_mask))
 			break;
 	}
-	return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
+
+	return __uv_cpu_to_apicid(cpu, apicid);
 }
 
 static unsigned int x2apic_get_apic_id(unsigned long x)
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index f25c2765a5c9..dd1ff39a464c 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -135,6 +135,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 	unsigned long mmr_value;
 	struct uv_IO_APIC_route_entry *entry;
 	int mmr_pnode, err;
+	unsigned int dest;
 
 	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
 			sizeof(unsigned long));
@@ -143,6 +144,10 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 	if (err != 0)
 		return err;
 
+	err = apic->cpu_mask_to_apicid(eligible_cpu, &dest);
+	if (err != 0)
+		return err;
+
 	if (limit == UV_AFFINITY_CPU)
 		irq_set_status_flags(irq, IRQ_NO_BALANCING);
 	else
@@ -159,7 +164,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 	entry->polarity		= 0;
 	entry->trigger		= 0;
 	entry->mask		= 0;
-	entry->dest		= apic->cpu_mask_to_apicid(eligible_cpu);
+	entry->dest		= dest;
 
 	mmr_pnode = uv_blade_to_pnode(mmr_blade);
 	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 6d347064b8b0..dafbad06390a 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -924,6 +924,7 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
 	struct irq_cfg *cfg = data->chip_data;
 	unsigned int dest, irq = data->irq;
 	struct irte irte;
+	int err;
 
 	if (!cpumask_intersects(mask, cpu_online_mask))
 		return -EINVAL;
@@ -931,10 +932,16 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
 	if (get_irte(irq, &irte))
 		return -EBUSY;
 
-	if (assign_irq_vector(irq, cfg, mask))
-		return -EBUSY;
+	err = assign_irq_vector(irq, cfg, mask);
+	if (err)
+		return err;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
+	err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest);
+	if (err) {
+		if (assign_irq_vector(irq, cfg, data->affinity));
+			pr_err("Failed to recover vector for irq %d\n", irq);
+		return err;
+	}
 
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
-- 
cgit v1.2.3


From 4988a40c3981212fa8c64da68722affc1cb6697a Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Thu, 7 Jun 2012 15:16:25 +0200
Subject: x86/apic: Make cpu_mask_to_apicid() operations check cpu_online_mask

Currently cpu_mask_to_apicid() should not get a offline CPU with
the cpumask. Otherwise some apic drivers might try to access
non-existent per-cpu variables (i.e. x2apic). In that regard
cpu_mask_to_apicid() and cpu_mask_to_apicid_and() operations are
inconsistent.

This fix makes the two operations do not rely on calling
functions and always return the apicid for only online CPUs. As
result, the meaning and implementations of cpu_mask_to_apicid()
and cpu_mask_to_apicid_and() operations become straight.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/20120607131624.GG4759@dhcp-26-207.brq.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/apic.h           | 6 ++----
 arch/x86/kernel/apic/apic.c           | 2 +-
 arch/x86/kernel/apic/es7000_32.c      | 3 +--
 arch/x86/kernel/apic/summit_32.c      | 3 +--
 arch/x86/kernel/apic/x2apic_cluster.c | 2 +-
 arch/x86/kernel/apic/x2apic_uv_x.c    | 2 +-
 6 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index ae91f9c7e360..1ed3eead2039 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -596,7 +596,7 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
 static inline int
 __flat_cpu_mask_to_apicid(unsigned long cpu_mask, unsigned int *apicid)
 {
-	cpu_mask &= APIC_ALL_CPUS;
+	cpu_mask = cpu_mask & APIC_ALL_CPUS & cpumask_bits(cpu_online_mask)[0];
 	if (likely(cpu_mask)) {
 		*apicid = (unsigned int)cpu_mask;
 		return 0;
@@ -619,9 +619,7 @@ flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 {
 	unsigned long mask1 = cpumask_bits(cpumask)[0];
 	unsigned long mask2 = cpumask_bits(andmask)[0];
-	unsigned long mask3 = cpumask_bits(cpu_online_mask)[0];
-
-	return __flat_cpu_mask_to_apicid(mask1 & mask2 & mask3, apicid);
+	return __flat_cpu_mask_to_apicid(mask1 & mask2, apicid);
 }
 
 extern int
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b8d92606f84f..7e9bbe73bc5a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2136,7 +2136,7 @@ static inline int __default_cpu_to_apicid(int cpu, unsigned int *apicid)
 int default_cpu_mask_to_apicid(const struct cpumask *cpumask,
 			       unsigned int *apicid)
 {
-	int cpu = cpumask_first(cpumask);
+	int cpu = cpumask_first_and(cpumask, cpu_online_mask);
 	return __default_cpu_to_apicid(cpu, apicid);
 }
 
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 515ebb00a9fc..b35cfb9b6962 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -534,7 +534,7 @@ es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
 	/*
 	 * The cpus in the mask must all be on the apic cluster.
 	 */
-	for_each_cpu(cpu, cpumask) {
+	for_each_cpu_and(cpu, cpumask, cpu_online_mask) {
 		int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 
 		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
@@ -561,7 +561,6 @@ es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
 		return 0;
 
 	cpumask_and(cpumask, inmask, andmask);
-	cpumask_and(cpumask, cpumask, cpu_online_mask);
 	es7000_cpu_mask_to_apicid(cpumask, apicid);
 
 	free_cpumask_var(cpumask);
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 5766d84f12d6..79d360f6729e 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -272,7 +272,7 @@ summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
 	/*
 	 * The cpus in the mask must all be on the apic cluster.
 	 */
-	for_each_cpu(cpu, cpumask) {
+	for_each_cpu_and(cpu, cpumask, cpu_online_mask) {
 		int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 
 		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
@@ -298,7 +298,6 @@ summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
 		return 0;
 
 	cpumask_and(cpumask, inmask, andmask);
-	cpumask_and(cpumask, cpumask, cpu_online_mask);
 	summit_cpu_mask_to_apicid(cpumask, apicid);
 
 	free_cpumask_var(cpumask);
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 5f86f79335f4..23a46cf5b6fd 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -99,7 +99,7 @@ static void x2apic_send_IPI_all(int vector)
 static int
 x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid)
 {
-	int cpu = cpumask_first(cpumask);
+	int cpu = cpumask_first_and(cpumask, cpu_online_mask);
 	int i;
 
 	if (cpu >= nr_cpu_ids)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 2f3030fef31e..307aa076bd62 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -286,7 +286,7 @@ uv_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	int cpu = cpumask_first(cpumask);
+	int cpu = cpumask_first_and(cpumask, cpu_online_mask);
 	return __uv_cpu_to_apicid(cpu, apicid);
 }
 
-- 
cgit v1.2.3


From bf947fcb77ff858f223c49c76e2d130095fa2585 Mon Sep 17 00:00:00 2001
From: Donald Dutile <ddutile@redhat.com>
Date: Mon, 4 Jun 2012 17:29:01 -0400
Subject: iommu/dmar: Replace printks with appropriate pr_*()

Just some cleanup so next patch can keep the info printing
the same way throughout the file.

Replace printk(KERN_*  with pr_*() functions.

Signed-off-by: Donald Dutile <ddutile@redhat.com>
Cc: iommu@lists.linux-foundation.org
Cc: chrisw@redhat.com
Cc: suresh.b.siddha@intel.com
Cc: dwmw2@infradead.org
Link: http://lkml.kernel.org/r/1338845342-12464-2-git-send-email-ddutile@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/iommu/dmar.c | 83 ++++++++++++++++++++++------------------------------
 1 file changed, 35 insertions(+), 48 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 3a74e4410fc0..1e5a10de3471 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -83,15 +83,14 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
 		 * ignore it
 		 */
 		if (!bus) {
-			printk(KERN_WARNING
-			PREFIX "Device scope bus [%d] not found\n",
-			scope->bus);
+			pr_warn(PREFIX "Device scope bus [%d] not found\n",
+				scope->bus);
 			break;
 		}
 		pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
 		if (!pdev) {
-			printk(KERN_WARNING PREFIX
-			"Device scope device [%04x:%02x:%02x.%02x] not found\n",
+			pr_warn(PREFIX "Device scope device"
+				"[%04x:%02x:%02x.%02x] not found\n",
 				segment, bus->number, path->dev, path->fn);
 			break;
 		}
@@ -100,9 +99,9 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
 		bus = pdev->subordinate;
 	}
 	if (!pdev) {
-		printk(KERN_WARNING PREFIX
-		"Device scope device [%04x:%02x:%02x.%02x] not found\n",
-		segment, scope->bus, path->dev, path->fn);
+		pr_warn(PREFIX
+			"Device scope device [%04x:%02x:%02x.%02x] not found\n",
+			segment, scope->bus, path->dev, path->fn);
 		*dev = NULL;
 		return 0;
 	}
@@ -110,8 +109,7 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
 			pdev->subordinate) || (scope->entry_type == \
 			ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
 		pci_dev_put(pdev);
-		printk(KERN_WARNING PREFIX
-			"Device scope type does not match for %s\n",
+		pr_warn(PREFIX "Device scope type does not match for %s\n",
 			 pci_name(pdev));
 		return -EINVAL;
 	}
@@ -134,8 +132,7 @@ int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
 			(*cnt)++;
 		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
-			printk(KERN_WARNING PREFIX
-			       "Unsupported device scope\n");
+			pr_warn(PREFIX "Unsupported device scope\n");
 		}
 		start += scope->length;
 	}
@@ -261,25 +258,23 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
 	case ACPI_DMAR_TYPE_HARDWARE_UNIT:
 		drhd = container_of(header, struct acpi_dmar_hardware_unit,
 				    header);
-		printk (KERN_INFO PREFIX
-			"DRHD base: %#016Lx flags: %#x\n",
+		pr_info(PREFIX "DRHD base: %#016Lx flags: %#x\n",
 			(unsigned long long)drhd->address, drhd->flags);
 		break;
 	case ACPI_DMAR_TYPE_RESERVED_MEMORY:
 		rmrr = container_of(header, struct acpi_dmar_reserved_memory,
 				    header);
-		printk (KERN_INFO PREFIX
-			"RMRR base: %#016Lx end: %#016Lx\n",
+		pr_info(PREFIX "RMRR base: %#016Lx end: %#016Lx\n",
 			(unsigned long long)rmrr->base_address,
 			(unsigned long long)rmrr->end_address);
 		break;
 	case ACPI_DMAR_TYPE_ATSR:
 		atsr = container_of(header, struct acpi_dmar_atsr, header);
-		printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
+		pr_info(PREFIX "ATSR flags: %#x\n", atsr->flags);
 		break;
 	case ACPI_DMAR_HARDWARE_AFFINITY:
 		rhsa = container_of(header, struct acpi_dmar_rhsa, header);
-		printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
+		pr_info(PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
 		       (unsigned long long)rhsa->base_address,
 		       rhsa->proximity_domain);
 		break;
@@ -299,7 +294,7 @@ static int __init dmar_table_detect(void)
 				&dmar_tbl_size);
 
 	if (ACPI_SUCCESS(status) && !dmar_tbl) {
-		printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
+		pr_warn(PREFIX "Unable to map DMAR\n");
 		status = AE_NOT_FOUND;
 	}
 
@@ -333,20 +328,18 @@ parse_dmar_table(void)
 		return -ENODEV;
 
 	if (dmar->width < PAGE_SHIFT - 1) {
-		printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
+		pr_warn(PREFIX "Invalid DMAR haw\n");
 		return -EINVAL;
 	}
 
-	printk (KERN_INFO PREFIX "Host address width %d\n",
-		dmar->width + 1);
+	pr_info(PREFIX "Host address width %d\n", dmar->width + 1);
 
 	entry_header = (struct acpi_dmar_header *)(dmar + 1);
 	while (((unsigned long)entry_header) <
 			(((unsigned long)dmar) + dmar_tbl->length)) {
 		/* Avoid looping forever on bad ACPI tables */
 		if (entry_header->length == 0) {
-			printk(KERN_WARNING PREFIX
-				"Invalid 0-length structure\n");
+			pr_warn(PREFIX "Invalid 0-length structure\n");
 			ret = -EINVAL;
 			break;
 		}
@@ -369,8 +362,7 @@ parse_dmar_table(void)
 #endif
 			break;
 		default:
-			printk(KERN_WARNING PREFIX
-				"Unknown DMAR structure type %d\n",
+			pr_warn(PREFIX "Unknown DMAR structure type %d\n",
 				entry_header->type);
 			ret = 0; /* for forward compatibility */
 			break;
@@ -469,12 +461,12 @@ int __init dmar_table_init(void)
 	ret = parse_dmar_table();
 	if (ret) {
 		if (ret != -ENODEV)
-			printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
+			pr_info(PREFIX "parse DMAR table failure.\n");
 		return ret;
 	}
 
 	if (list_empty(&dmar_drhd_units)) {
-		printk(KERN_INFO PREFIX "No DMAR devices found\n");
+		pr_info(PREFIX "No DMAR devices found\n");
 		return -ENODEV;
 	}
 
@@ -506,8 +498,7 @@ int __init check_zero_address(void)
 			(((unsigned long)dmar) + dmar_tbl->length)) {
 		/* Avoid looping forever on bad ACPI tables */
 		if (entry_header->length == 0) {
-			printk(KERN_WARNING PREFIX
-				"Invalid 0-length structure\n");
+			pr_warn(PREFIX "Invalid 0-length structure\n");
 			return 0;
 		}
 
@@ -558,8 +549,8 @@ int __init detect_intel_iommu(void)
 
 		if (ret && irq_remapping_enabled && cpu_has_x2apic &&
 		    dmar->flags & 0x1)
-			printk(KERN_INFO
-			       "Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
+			pr_info("Queued invalidation will be enabled to "
+				"support x2apic and Intr-remapping.\n");
 
 		if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
 			iommu_detected = 1;
@@ -602,7 +593,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 
 	iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
 	if (!iommu->reg) {
-		printk(KERN_ERR "IOMMU: can't map the region\n");
+		pr_err("IOMMU: can't map the region\n");
 		goto error;
 	}
 	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
@@ -615,15 +606,13 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 
 	agaw = iommu_calculate_agaw(iommu);
 	if (agaw < 0) {
-		printk(KERN_ERR
-		       "Cannot get a valid agaw for iommu (seq_id = %d)\n",
-		       iommu->seq_id);
+		pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
+			iommu->seq_id);
 		goto err_unmap;
 	}
 	msagaw = iommu_calculate_max_sagaw(iommu);
 	if (msagaw < 0) {
-		printk(KERN_ERR
-			"Cannot get a valid max agaw for iommu (seq_id = %d)\n",
+		pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
 			iommu->seq_id);
 		goto err_unmap;
 	}
@@ -640,7 +629,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 		iounmap(iommu->reg);
 		iommu->reg = ioremap(drhd->reg_base_addr, map_size);
 		if (!iommu->reg) {
-			printk(KERN_ERR "IOMMU: can't map the region\n");
+			pr_err("IOMMU: can't map the region\n");
 			goto error;
 		}
 	}
@@ -710,7 +699,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
 	if (fault & DMA_FSTS_IQE) {
 		head = readl(iommu->reg + DMAR_IQH_REG);
 		if ((head >> DMAR_IQ_SHIFT) == index) {
-			printk(KERN_ERR "VT-d detected invalid descriptor: "
+			pr_err("VT-d detected invalid descriptor: "
 				"low=%llx, high=%llx\n",
 				(unsigned long long)qi->desc[index].low,
 				(unsigned long long)qi->desc[index].high);
@@ -1129,15 +1118,14 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
 	reason = dmar_get_fault_reason(fault_reason, &fault_type);
 
 	if (fault_type == INTR_REMAP)
-		printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
+		pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "
 		       "fault index %llx\n"
 			"INTR-REMAP:[fault reason %02d] %s\n",
 			(source_id >> 8), PCI_SLOT(source_id & 0xFF),
 			PCI_FUNC(source_id & 0xFF), addr >> 48,
 			fault_reason, reason);
 	else
-		printk(KERN_ERR
-		       "DMAR:[%s] Request device [%02x:%02x.%d] "
+		pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "
 		       "fault addr %llx \n"
 		       "DMAR:[fault reason %02d] %s\n",
 		       (type ? "DMA Read" : "DMA Write"),
@@ -1157,8 +1145,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
 	if (fault_status)
-		printk(KERN_ERR "DRHD: handling fault status reg %x\n",
-		       fault_status);
+		pr_err("DRHD: handling fault status reg %x\n", fault_status);
 
 	/* TBD: ignore advanced fault log currently */
 	if (!(fault_status & DMA_FSTS_PPF))
@@ -1224,7 +1211,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
 
 	irq = create_irq();
 	if (!irq) {
-		printk(KERN_ERR "IOMMU: no free vectors\n");
+		pr_err("IOMMU: no free vectors\n");
 		return -EINVAL;
 	}
 
@@ -1241,7 +1228,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
 
 	ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
 	if (ret)
-		printk(KERN_ERR "IOMMU: can't request irq\n");
+		pr_err("IOMMU: can't request irq\n");
 	return ret;
 }
 
@@ -1258,7 +1245,7 @@ int __init enable_drhd_fault_handling(void)
 		ret = dmar_set_interrupt(iommu);
 
 		if (ret) {
-			printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
+			pr_err("DRHD %Lx: failed to enable fault, "
 			       " interrupt, ret %d\n",
 			       (unsigned long long)drhd->reg_base_addr, ret);
 			return -1;
-- 
cgit v1.2.3


From 6f5cf52114dd87f9ed091678f7dfc8ff21bbe2b3 Mon Sep 17 00:00:00 2001
From: Donald Dutile <ddutile@redhat.com>
Date: Mon, 4 Jun 2012 17:29:02 -0400
Subject: iommu/dmar: Reserve mmio space used by the IOMMU, if the BIOS forgets
 to

Intel-iommu initialization doesn't currently reserve the memory
used for the IOMMU registers. This can allow the pci resource
allocator to assign a device BAR to the same address as the
IOMMU registers. This can cause some not so nice side affects
when the driver ioremap's that region.

Introduced two helper functions to map & unmap the IOMMU
registers as well as simplify the init and exit paths.

Signed-off-by: Donald Dutile <ddutile@redhat.com>
Acked-by: Chris Wright <chrisw@redhat.com>
Cc: iommu@lists.linux-foundation.org
Cc: suresh.b.siddha@intel.com
Cc: dwmw2@infradead.org
Link: http://lkml.kernel.org/r/1338845342-12464-3-git-send-email-ddutile@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/iommu/dmar.c        | 111 +++++++++++++++++++++++++++++++++-----------
 include/linux/intel-iommu.h |   2 +
 2 files changed, 86 insertions(+), 27 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 1e5a10de3471..9ab6ebf46f7a 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -570,14 +570,89 @@ int __init detect_intel_iommu(void)
 }
 
 
+static void unmap_iommu(struct intel_iommu *iommu)
+{
+	iounmap(iommu->reg);
+	release_mem_region(iommu->reg_phys, iommu->reg_size);
+}
+
+/**
+ * map_iommu: map the iommu's registers
+ * @iommu: the iommu to map
+ * @phys_addr: the physical address of the base resgister
+ * 
+ * Memory map the iommu's registers.  Start w/ a single page, and
+ * possibly expand if that turns out to be insufficent.  
+ */
+static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
+{
+	int map_size, err=0;
+
+	iommu->reg_phys = phys_addr;
+	iommu->reg_size = VTD_PAGE_SIZE;
+
+	if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
+		pr_err("IOMMU: can't reserve memory\n");
+		err = -EBUSY;
+		goto out;
+	}
+
+	iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
+	if (!iommu->reg) {
+		pr_err("IOMMU: can't map the region\n");
+		err = -ENOMEM;
+		goto release;
+	}
+
+	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
+	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
+
+	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
+		err = -EINVAL;
+		warn_invalid_dmar(phys_addr, " returns all ones");
+		goto unmap;
+	}
+
+	/* the registers might be more than one page */
+	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
+			 cap_max_fault_reg_offset(iommu->cap));
+	map_size = VTD_PAGE_ALIGN(map_size);
+	if (map_size > iommu->reg_size) {
+		iounmap(iommu->reg);
+		release_mem_region(iommu->reg_phys, iommu->reg_size);
+		iommu->reg_size = map_size;
+		if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
+					iommu->name)) {
+			pr_err("IOMMU: can't reserve memory\n");
+			err = -EBUSY;
+			goto out;
+		}
+		iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
+		if (!iommu->reg) {
+			pr_err("IOMMU: can't map the region\n");
+			err = -ENOMEM;
+			goto release;
+		}
+	}
+	err = 0;
+	goto out;
+
+unmap:
+	iounmap(iommu->reg);
+release:
+	release_mem_region(iommu->reg_phys, iommu->reg_size);
+out:
+	return err;
+}
+
 int alloc_iommu(struct dmar_drhd_unit *drhd)
 {
 	struct intel_iommu *iommu;
-	int map_size;
 	u32 ver;
 	static int iommu_allocated = 0;
 	int agaw = 0;
 	int msagaw = 0;
+	int err;
 
 	if (!drhd->reg_base_addr) {
 		warn_invalid_dmar(0, "");
@@ -591,19 +666,13 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 	iommu->seq_id = iommu_allocated++;
 	sprintf (iommu->name, "dmar%d", iommu->seq_id);
 
-	iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
-	if (!iommu->reg) {
-		pr_err("IOMMU: can't map the region\n");
+	err = map_iommu(iommu, drhd->reg_base_addr);
+	if (err) {
+		pr_err("IOMMU: failed to map %s\n", iommu->name);
 		goto error;
 	}
-	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
-	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
-
-	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
-		warn_invalid_dmar(drhd->reg_base_addr, " returns all ones");
-		goto err_unmap;
-	}
 
+	err = -EINVAL;
 	agaw = iommu_calculate_agaw(iommu);
 	if (agaw < 0) {
 		pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
@@ -621,19 +690,6 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 
 	iommu->node = -1;
 
-	/* the registers might be more than one page */
-	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
-		cap_max_fault_reg_offset(iommu->cap));
-	map_size = VTD_PAGE_ALIGN(map_size);
-	if (map_size > VTD_PAGE_SIZE) {
-		iounmap(iommu->reg);
-		iommu->reg = ioremap(drhd->reg_base_addr, map_size);
-		if (!iommu->reg) {
-			pr_err("IOMMU: can't map the region\n");
-			goto error;
-		}
-	}
-
 	ver = readl(iommu->reg + DMAR_VER_REG);
 	pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
 		iommu->seq_id,
@@ -648,10 +704,10 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 	return 0;
 
  err_unmap:
-	iounmap(iommu->reg);
+	unmap_iommu(iommu);
  error:
 	kfree(iommu);
-	return -1;
+	return err;
 }
 
 void free_iommu(struct intel_iommu *iommu)
@@ -662,7 +718,8 @@ void free_iommu(struct intel_iommu *iommu)
 	free_dmar_iommu(iommu);
 
 	if (iommu->reg)
-		iounmap(iommu->reg);
+		unmap_iommu(iommu);
+
 	kfree(iommu);
 }
 
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index e6ca56de9936..78e2ada50cd5 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -308,6 +308,8 @@ enum {
 
 struct intel_iommu {
 	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
+	u64 		reg_phys; /* physical address of hw register set */
+	u64		reg_size; /* size of hw register set */
 	u64		cap;
 	u64		ecap;
 	u32		gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
-- 
cgit v1.2.3


From 7eb9ba5ed312ec6ed9d22259c5da1acb7cf4bd29 Mon Sep 17 00:00:00 2001
From: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Date: Fri, 8 Jun 2012 15:02:57 +0530
Subject: uprobes: Pass probed vaddr to arch_uprobe_analyze_insn()

On RISC architectures like powerpc, instructions are fixed size.
Instruction analysis on such platforms is just a matter of
(insn % 4). Pass the vaddr at which the uprobe is to be inserted so
that arch_uprobe_analyze_insn() can flag misaligned registration
requests.

Signed-off-by: Ananth N Mavinakaynahalli <ananth@in.ibm.com>
Cc: michael@ellerman.id.au
Cc: antonb@thinktux.localdomain
Cc: Paul Mackerras <paulus@samba.org>
Cc: benh@kernel.crashing.org
Cc: peterz@infradead.org
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: oleg@redhat.com
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/20120608093257.GG13409@in.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/uprobes.h | 2 +-
 arch/x86/kernel/uprobes.c      | 3 ++-
 kernel/events/uprobes.c        | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 1e9bed14f7ae..f3971bbcd1de 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -48,7 +48,7 @@ struct arch_uprobe_task {
 #endif
 };
 
-extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm);
+extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
 extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
 extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
 extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index dc4e910a7d96..36fd42091fa7 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -409,9 +409,10 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,
  * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
  * @mm: the probed address space.
  * @arch_uprobe: the probepoint information.
+ * @addr: virtual address at which to install the probepoint
  * Return 0 on success or a -ve number on error.
  */
-int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm)
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
 {
 	int ret;
 	struct insn insn;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 8c5e043cd309..b52376d02332 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -706,7 +706,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
 		if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
 			return -EEXIST;
 
-		ret = arch_uprobe_analyze_insn(&uprobe->arch, mm);
+		ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, addr);
 		if (ret)
 			return ret;
 
-- 
cgit v1.2.3


From c7d65a78fc18ed70353baeb7497ec71a7c775ac5 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 7 Jun 2012 11:03:00 -0400
Subject: x86: Remove cmpxchg from i386 NMI nesting code

I've been informed by someone on LWN called 'slashdot' that
some i386 machines do not support a true cmpxchg. The cmpxchg
used by the i386 NMI nesting code must be a true cmpxchg as
disabling interrupts will not work for NMIs (which is the work
around for i386s that do not have a true cmpxchg).

This 'slashdot' character also suggested a fix to the issue.
As the state of the nesting NMIs goes as follows:

  NOT_RUNNING -> EXECUTING
  EXECUTING   -> NOT_RUNNING
  EXECUTING   -> LATCHED
  LATCHED     -> EXECUTING

Having these states as enum values of:

  NOT_RUNNING = 0
  EXECUTING   = 1
  LATCHED     = 2

Instead of a cmpxchg to make EXECUTING -> NOT_RUNNING a
dec_and_test() would work as well. If the dec_and_test brings
the state to NOT_RUNNING, that is the same as a cmpxchg
succeeding to change EXECUTING to NOT_RUNNING. If a nested NMI
were to come in and change it to LATCHED, the dec_and_test() would
convert the state to EXECUTING (what we want it to be in such a
case anyway).

I asked 'slashdot' to post this as a patch, but it never came to
be. I decided to do the work instead.

Thanks to H. Peter Anvin for suggesting to use this_cpu_dec_and_return()
instead of local_dec_and_test(&__get_cpu_var()).

Link: http://lwn.net/Articles/484932/

Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/nmi.c | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index a0b2f84457be..a15a88800661 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -365,8 +365,9 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
 #ifdef CONFIG_X86_32
 /*
  * For i386, NMIs use the same stack as the kernel, and we can
- * add a workaround to the iret problem in C. Simply have 3 states
- * the NMI can be in.
+ * add a workaround to the iret problem in C (preventing nested
+ * NMIs if an NMI takes a trap). Simply have 3 states the NMI
+ * can be in:
  *
  *  1) not running
  *  2) executing
@@ -383,13 +384,20 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
  * If an NMI hits a breakpoint that executes an iret, another
  * NMI can preempt it. We do not want to allow this new NMI
  * to run, but we want to execute it when the first one finishes.
- * We set the state to "latched", and the first NMI will perform
- * an cmpxchg on the state, and if it doesn't successfully
- * reset the state to "not running" it will restart the next
- * NMI.
+ * We set the state to "latched", and the exit of the first NMI will
+ * perform a dec_return, if the result is zero (NOT_RUNNING), then
+ * it will simply exit the NMI handler. If not, the dec_return
+ * would have set the state to NMI_EXECUTING (what we want it to
+ * be when we are running). In this case, we simply jump back
+ * to rerun the NMI handler again, and restart the 'latched' NMI.
+ *
+ * No trap (breakpoint or page fault) should be hit before nmi_restart,
+ * thus there is no race between the first check of state for NOT_RUNNING
+ * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs
+ * at this point.
  */
 enum nmi_states {
-	NMI_NOT_RUNNING,
+	NMI_NOT_RUNNING = 0,
 	NMI_EXECUTING,
 	NMI_LATCHED,
 };
@@ -397,18 +405,17 @@ static DEFINE_PER_CPU(enum nmi_states, nmi_state);
 
 #define nmi_nesting_preprocess(regs)					\
 	do {								\
-		if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) {	\
-			__get_cpu_var(nmi_state) = NMI_LATCHED;		\
+		if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {	\
+			this_cpu_write(nmi_state, NMI_LATCHED);		\
 			return;						\
 		}							\
-	nmi_restart:							\
-		__get_cpu_var(nmi_state) = NMI_EXECUTING;		\
-	} while (0)
+		this_cpu_write(nmi_state, NMI_EXECUTING);		\
+	} while (0);							\
+	nmi_restart:
 
 #define nmi_nesting_postprocess()					\
 	do {								\
-		if (cmpxchg(&__get_cpu_var(nmi_state),			\
-		    NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING)	\
+		if (this_cpu_dec_return(nmi_state))			\
 			goto nmi_restart;				\
 	} while (0)
 #else /* x86_64 */
-- 
cgit v1.2.3


From 70fb74a5420f9caa3e001d65004e4b669124283e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 7 Jun 2012 11:54:37 -0400
Subject: x86: Save cr2 in NMI in case NMIs take a page fault (for i386)

Avi Kivity reported that page faults in NMIs could cause havic if
the NMI preempted another page fault handler:

   The recent changes to NMI allow exceptions to take place in NMI
   handlers, but I think that a #PF (say, due to access to vmalloc space)
   is still problematic.  Consider the sequence

    #PF  (cr2 set by processor)
      NMI
        ...
        #PF (cr2 clobbered)
          do_page_fault()
          IRET
        ...
        IRET
      do_page_fault()
        address = read_cr2()

   The last line reads the overwritten cr2 value.

This is the i386 version, which has the luxury of doing the work
in C code.

Link: http://lkml.kernel.org/r/4FBB8C40.6080304@redhat.com

Reported-by: Avi Kivity <avi@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/nmi.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index a15a88800661..f84f5c57de35 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -395,6 +395,14 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
  * thus there is no race between the first check of state for NOT_RUNNING
  * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs
  * at this point.
+ *
+ * In case the NMI takes a page fault, we need to save off the CR2
+ * because the NMI could have preempted another page fault and corrupt
+ * the CR2 that is about to be read. As nested NMIs must be restarted
+ * and they can not take breakpoints or page faults, the update of the
+ * CR2 must be done before converting the nmi state back to NOT_RUNNING.
+ * Otherwise, there would be a race of another nested NMI coming in
+ * after setting state to NOT_RUNNING but before updating the nmi_cr2.
  */
 enum nmi_states {
 	NMI_NOT_RUNNING = 0,
@@ -402,6 +410,7 @@ enum nmi_states {
 	NMI_LATCHED,
 };
 static DEFINE_PER_CPU(enum nmi_states, nmi_state);
+static DEFINE_PER_CPU(unsigned long, nmi_cr2);
 
 #define nmi_nesting_preprocess(regs)					\
 	do {								\
@@ -410,11 +419,14 @@ static DEFINE_PER_CPU(enum nmi_states, nmi_state);
 			return;						\
 		}							\
 		this_cpu_write(nmi_state, NMI_EXECUTING);		\
+		this_cpu_write(nmi_cr2, read_cr2());			\
 	} while (0);							\
 	nmi_restart:
 
 #define nmi_nesting_postprocess()					\
 	do {								\
+		if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))	\
+			write_cr2(this_cpu_read(nmi_cr2));		\
 		if (this_cpu_dec_return(nmi_state))			\
 			goto nmi_restart;				\
 	} while (0)
-- 
cgit v1.2.3


From e9071b0be5e7ce4903b7f7c370769d485774d3e3 Mon Sep 17 00:00:00 2001
From: Donald Dutile <ddutile@redhat.com>
Date: Fri, 8 Jun 2012 17:13:11 -0400
Subject: iommu/dmar: Use pr_format() instead of PREFIX to tidy up pr_*() calls

Joe Perches recommended getting rid of the redundant
formatting of adding "PREFIX" to all the uses of pr_*() calls.

The recommendation helps to reduce source and improve
readibility.

While cleaning up the PREFIX's, I saw that one of
the pr_warn() was redundant in dmar_parse_one_dev_scope(),
since the same message was printed after breaking out of the
while loop for the same condition, !pdev.
So, to avoid a duplicate message, I removed the one in the while
loop.

Reported-by: Joe Perches <joe@perches.com>
Signed-off-by: Donald Dutile <ddutile@redhat.com>
Cc: iommu@lists.linux-foundation.org
Cc: chrisw@redhat.com
Cc: suresh.b.siddha@intel.com
Cc: dwmw2@infradead.org
Link: http://lkml.kernel.org/r/1339189991-13129-1-git-send-email-ddutile@redhat.com
[ Small whitespace fixes. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/iommu/dmar.c | 54 +++++++++++++++++++++++-----------------------------
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 9ab6ebf46f7a..86e2f4a62b9a 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -26,6 +26,8 @@
  * These routines are used by both DMA-remapping and Interrupt-remapping
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */
+
 #include <linux/pci.h>
 #include <linux/dmar.h>
 #include <linux/iova.h>
@@ -39,8 +41,6 @@
 #include <asm/irq_remapping.h>
 #include <asm/iommu_table.h>
 
-#define PREFIX "DMAR: "
-
 /* No locks are needed as DMA remapping hardware unit
  * list is constructed at boot time and hotplug of
  * these units are not supported by the architecture.
@@ -83,15 +83,12 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
 		 * ignore it
 		 */
 		if (!bus) {
-			pr_warn(PREFIX "Device scope bus [%d] not found\n",
-				scope->bus);
+			pr_warn("Device scope bus [%d] not found\n", scope->bus);
 			break;
 		}
 		pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
 		if (!pdev) {
-			pr_warn(PREFIX "Device scope device"
-				"[%04x:%02x:%02x.%02x] not found\n",
-				segment, bus->number, path->dev, path->fn);
+			/* warning will be printed below */
 			break;
 		}
 		path ++;
@@ -99,8 +96,7 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
 		bus = pdev->subordinate;
 	}
 	if (!pdev) {
-		pr_warn(PREFIX
-			"Device scope device [%04x:%02x:%02x.%02x] not found\n",
+		pr_warn("Device scope device [%04x:%02x:%02x.%02x] not found\n",
 			segment, scope->bus, path->dev, path->fn);
 		*dev = NULL;
 		return 0;
@@ -109,8 +105,8 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
 			pdev->subordinate) || (scope->entry_type == \
 			ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
 		pci_dev_put(pdev);
-		pr_warn(PREFIX "Device scope type does not match for %s\n",
-			 pci_name(pdev));
+		pr_warn("Device scope type does not match for %s\n",
+			pci_name(pdev));
 		return -EINVAL;
 	}
 	*dev = pdev;
@@ -132,7 +128,7 @@ int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
 			(*cnt)++;
 		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
-			pr_warn(PREFIX "Unsupported device scope\n");
+			pr_warn("Unsupported device scope\n");
 		}
 		start += scope->length;
 	}
@@ -258,23 +254,23 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
 	case ACPI_DMAR_TYPE_HARDWARE_UNIT:
 		drhd = container_of(header, struct acpi_dmar_hardware_unit,
 				    header);
-		pr_info(PREFIX "DRHD base: %#016Lx flags: %#x\n",
+		pr_info("DRHD base: %#016Lx flags: %#x\n",
 			(unsigned long long)drhd->address, drhd->flags);
 		break;
 	case ACPI_DMAR_TYPE_RESERVED_MEMORY:
 		rmrr = container_of(header, struct acpi_dmar_reserved_memory,
 				    header);
-		pr_info(PREFIX "RMRR base: %#016Lx end: %#016Lx\n",
+		pr_info("RMRR base: %#016Lx end: %#016Lx\n",
 			(unsigned long long)rmrr->base_address,
 			(unsigned long long)rmrr->end_address);
 		break;
 	case ACPI_DMAR_TYPE_ATSR:
 		atsr = container_of(header, struct acpi_dmar_atsr, header);
-		pr_info(PREFIX "ATSR flags: %#x\n", atsr->flags);
+		pr_info("ATSR flags: %#x\n", atsr->flags);
 		break;
 	case ACPI_DMAR_HARDWARE_AFFINITY:
 		rhsa = container_of(header, struct acpi_dmar_rhsa, header);
-		pr_info(PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
+		pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
 		       (unsigned long long)rhsa->base_address,
 		       rhsa->proximity_domain);
 		break;
@@ -294,7 +290,7 @@ static int __init dmar_table_detect(void)
 				&dmar_tbl_size);
 
 	if (ACPI_SUCCESS(status) && !dmar_tbl) {
-		pr_warn(PREFIX "Unable to map DMAR\n");
+		pr_warn("Unable to map DMAR\n");
 		status = AE_NOT_FOUND;
 	}
 
@@ -328,18 +324,18 @@ parse_dmar_table(void)
 		return -ENODEV;
 
 	if (dmar->width < PAGE_SHIFT - 1) {
-		pr_warn(PREFIX "Invalid DMAR haw\n");
+		pr_warn("Invalid DMAR haw\n");
 		return -EINVAL;
 	}
 
-	pr_info(PREFIX "Host address width %d\n", dmar->width + 1);
+	pr_info("Host address width %d\n", dmar->width + 1);
 
 	entry_header = (struct acpi_dmar_header *)(dmar + 1);
 	while (((unsigned long)entry_header) <
 			(((unsigned long)dmar) + dmar_tbl->length)) {
 		/* Avoid looping forever on bad ACPI tables */
 		if (entry_header->length == 0) {
-			pr_warn(PREFIX "Invalid 0-length structure\n");
+			pr_warn("Invalid 0-length structure\n");
 			ret = -EINVAL;
 			break;
 		}
@@ -362,7 +358,7 @@ parse_dmar_table(void)
 #endif
 			break;
 		default:
-			pr_warn(PREFIX "Unknown DMAR structure type %d\n",
+			pr_warn("Unknown DMAR structure type %d\n",
 				entry_header->type);
 			ret = 0; /* for forward compatibility */
 			break;
@@ -461,12 +457,12 @@ int __init dmar_table_init(void)
 	ret = parse_dmar_table();
 	if (ret) {
 		if (ret != -ENODEV)
-			pr_info(PREFIX "parse DMAR table failure.\n");
+			pr_info("parse DMAR table failure.\n");
 		return ret;
 	}
 
 	if (list_empty(&dmar_drhd_units)) {
-		pr_info(PREFIX "No DMAR devices found\n");
+		pr_info("No DMAR devices found\n");
 		return -ENODEV;
 	}
 
@@ -498,7 +494,7 @@ int __init check_zero_address(void)
 			(((unsigned long)dmar) + dmar_tbl->length)) {
 		/* Avoid looping forever on bad ACPI tables */
 		if (entry_header->length == 0) {
-			pr_warn(PREFIX "Invalid 0-length structure\n");
+			pr_warn("Invalid 0-length structure\n");
 			return 0;
 		}
 
@@ -549,8 +545,7 @@ int __init detect_intel_iommu(void)
 
 		if (ret && irq_remapping_enabled && cpu_has_x2apic &&
 		    dmar->flags & 0x1)
-			pr_info("Queued invalidation will be enabled to "
-				"support x2apic and Intr-remapping.\n");
+			pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
 
 		if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
 			iommu_detected = 1;
@@ -580,9 +575,9 @@ static void unmap_iommu(struct intel_iommu *iommu)
  * map_iommu: map the iommu's registers
  * @iommu: the iommu to map
  * @phys_addr: the physical address of the base resgister
- * 
+ *
  * Memory map the iommu's registers.  Start w/ a single page, and
- * possibly expand if that turns out to be insufficent.  
+ * possibly expand if that turns out to be insufficent.
  */
 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
 {
@@ -1302,8 +1297,7 @@ int __init enable_drhd_fault_handling(void)
 		ret = dmar_set_interrupt(iommu);
 
 		if (ret) {
-			pr_err("DRHD %Lx: failed to enable fault, "
-			       " interrupt, ret %d\n",
+			pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
 			       (unsigned long long)drhd->reg_base_addr, ret);
 			return -1;
 		}
-- 
cgit v1.2.3


From e2b297fcf17fc03734e93387fb8195c782286b35 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkhan@gmail.com>
Date: Sun, 10 Jun 2012 21:13:41 -0600
Subject: perf/x86: Convert obsolete simple_strtoul() usage to kstrtoul()

Signed-off-by: Shuah Khan <shuahkhan@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/1339384421.3025.8.camel@lorien2
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 000a4746c7ce..766c76d5ec4c 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1640,7 +1640,12 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
 			      struct device_attribute *attr,
 			      const char *buf, size_t count)
 {
-	unsigned long val = simple_strtoul(buf, NULL, 0);
+	unsigned long val;
+	ssize_t ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
 
 	if (!!val != !!x86_pmu.attr_rdpmc) {
 		x86_pmu.attr_rdpmc = !!val;
-- 
cgit v1.2.3


From 110c1e1f1bf61e5dca53ff5c9dc75243ce87c002 Mon Sep 17 00:00:00 2001
From: Ravikiran Thirumalai <kiran.thirumalai@gmail.com>
Date: Sun, 3 Jun 2012 01:11:35 +0300
Subject: x86/vsmp: Ignore IOAPIC IRQ affinity if possible

vSMP can route interrupts more optimally based on internal
knowledge the OS does not have. In order to support this
optimization, all CPUs must be able to handle all possible
IOAPIC interrupts.

Fix this by setting the vector allocation domain for all CPUs
and by enabling this feature in vSMP.

Signed-off-by: Ravikiran Thirumalai <kiran.thirumalai@gmail.com>
Signed-off-by: Shai Fultheim <shai@scalemp.com>
[ Rebased, simplified, and reworded the commit message. ]
Signed-off-by: Ido Yariv <ido@wizery.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/vsmp_64.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 59eea855f451..6b96a7374f94 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -16,6 +16,7 @@
 #include <linux/pci_ids.h>
 #include <linux/pci_regs.h>
 #include <linux/smp.h>
+#include <linux/irq.h>
 
 #include <asm/apic.h>
 #include <asm/pci-direct.h>
@@ -95,6 +96,15 @@ static void __init set_vsmp_pv_ops(void)
 	ctl = readl(address + 4);
 	printk(KERN_INFO "vSMP CTL: capabilities:0x%08x  control:0x%08x\n",
 	       cap, ctl);
+
+	/* If possible, let the vSMP foundation route the interrupt optimally */
+#ifdef CONFIG_SMP
+	if (cap & ctl & BIT(8)) {
+		ctl &= ~BIT(8);
+		no_irq_affinity = 1;
+	}
+#endif
+
 	if (cap & ctl & (1 << 4)) {
 		/* Setup irq ops and turn on vSMP  IRQ fastpath handling */
 		pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable);
@@ -102,12 +112,11 @@ static void __init set_vsmp_pv_ops(void)
 		pv_irq_ops.save_fl  = PV_CALLEE_SAVE(vsmp_save_fl);
 		pv_irq_ops.restore_fl  = PV_CALLEE_SAVE(vsmp_restore_fl);
 		pv_init_ops.patch = vsmp_patch;
-
 		ctl &= ~(1 << 4);
-		writel(ctl, address + 4);
-		ctl = readl(address + 4);
-		printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl);
 	}
+	writel(ctl, address + 4);
+	ctl = readl(address + 4);
+	pr_info("vSMP CTL: control set to:0x%08x\n", ctl);
 
 	early_iounmap(address, 8);
 }
@@ -192,10 +201,20 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
 	return hard_smp_processor_id() >> index_msb;
 }
 
+/*
+ * In vSMP, all cpus should be capable of handling interrupts, regardless of
+ * the APIC used.
+ */
+static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+	cpumask_setall(retmask);
+}
+
 static void vsmp_apic_post_init(void)
 {
 	/* need to update phys_pkg_id */
 	apic->phys_pkg_id = apicid_phys_pkg_id;
+	apic->vector_allocation_domain = fill_vector_allocation_domain;
 }
 
 void __init vsmp_init(void)
-- 
cgit v1.2.3


From b871a42b6091b720e82ddff237659534c525c25b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 11 Jun 2012 15:07:08 +0200
Subject: smpboot: Remove leftover declaration

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/smpboot.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/smpboot.h b/kernel/smpboot.h
index 80c0acfb8472..6ef9433e1c70 100644
--- a/kernel/smpboot.h
+++ b/kernel/smpboot.h
@@ -3,8 +3,6 @@
 
 struct task_struct;
 
-int smpboot_prepare(unsigned int cpu);
-
 #ifdef CONFIG_GENERIC_SMP_IDLE_THREAD
 struct task_struct *idle_thread_get(unsigned int cpu);
 void idle_thread_set_boot_cpu(void);
-- 
cgit v1.2.3


From ed88bed881c9948c4035828c5d63f60c7b015f86 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 12 Jun 2012 19:26:33 +0300
Subject: x86/apic/irq_remap: Silence a bogus pr_err()

There is an extra semicolon here so the pr_err() message is
printed when it is not intended.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: Alexander Gordeev <agordeev@redhat.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Joerg Roedel <joerg.roedel@amd.com>
Link: http://lkml.kernel.org/r/20120612162633.GA11077@elgon.mountain
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/iommu/intel_irq_remapping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index dafbad06390a..853902a1b7db 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -938,7 +938,7 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
 
 	err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest);
 	if (err) {
-		if (assign_irq_vector(irq, cfg, data->affinity));
+		if (assign_irq_vector(irq, cfg, data->affinity))
 			pr_err("Failed to recover vector for irq %d\n", irq);
 		return err;
 	}
-- 
cgit v1.2.3


From 2f74759056797054122cdc70844137f70bb3f626 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Thu, 7 Jun 2012 22:20:18 +0900
Subject: x86/alternatives: Use atomic_xchg() instead atomic_dec_and_test() for
 stop_machine_text_poke()

stop_machine_text_poke() uses atomic_dec_and_test() to select one of
the CPUs executing that function to actually modify the code.

Since the variable is initialized to 1, subsequent CPUs will make the
variable go negative. Since going negative is uncommon/unexpected in
typical dec_and_test usage change this user to atomic_xchg().

This was found using a patch that warns on dec_and_test going
negative.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
[ Rewrote changelog ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/87zk8fgsx9.fsf@devron.myhome.or.jp
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/alternative.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 1f84794f0759..53231a045d3d 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -664,7 +664,7 @@ static int __kprobes stop_machine_text_poke(void *data)
 	struct text_poke_param *p;
 	int i;
 
-	if (atomic_dec_and_test(&stop_machine_first)) {
+	if (atomic_xchg(&stop_machine_first, 0)) {
 		for (i = 0; i < tpp->nparams; i++) {
 			p = &tpp->params[i];
 			text_poke(p->addr, p->opcode, p->len);
-- 
cgit v1.2.3


From cac4afbc3da58d9e5701b34bd4c1f11ea13328d4 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Thu, 14 Jun 2012 12:39:34 +0200
Subject: x86/x2apic/cluster: Vector_allocation_domain() should return a value

Since commit 8637e38 ("x86/apic: Avoid useless scanning thru a
cpumask in assign_irq_vector()") vector_allocation_domain()
operation indicates if a cpumask is dynamic or static. This
update fixes the oversight and makes the operation to return a
value.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/20120614103933.GJ3383@dhcp-26-207.brq.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/x2apic_cluster.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 23a46cf5b6fd..1885a73b7f33 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -228,10 +228,11 @@ static int x2apic_cluster_probe(void)
 /*
  * Each x2apic cluster is an allocation domain.
  */
-static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask)
+static bool cluster_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	cpumask_clear(retmask);
 	cpumask_copy(retmask, per_cpu(cpus_in_cluster, cpu));
+	return true;
 }
 
 static struct apic apic_x2apic_cluster = {
-- 
cgit v1.2.3


From a5a391561bc25898ba1a702a0c4b028aa5b11ce9 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Thu, 14 Jun 2012 09:49:35 +0200
Subject: x86/apic: Eliminate cpu_mask_to_apicid() operation

Since there are only two locations where cpu_mask_to_apicid() is
called from, remove the operation and use only
cpu_mask_to_apicid_and() instead.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Suggested-and-acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/20120614074935.GE3383@dhcp-26-207.brq.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/apic.h           | 33 ++++++++-------------------------
 arch/x86/kernel/apic/apic.c           | 24 ++++++------------------
 arch/x86/kernel/apic/apic_flat_64.c   |  2 --
 arch/x86/kernel/apic/apic_noop.c      |  1 -
 arch/x86/kernel/apic/apic_numachip.c  |  1 -
 arch/x86/kernel/apic/bigsmp_32.c      |  1 -
 arch/x86/kernel/apic/es7000_32.c      |  4 +---
 arch/x86/kernel/apic/io_apic.c        |  3 ++-
 arch/x86/kernel/apic/numaq_32.c       |  8 --------
 arch/x86/kernel/apic/probe_32.c       |  1 -
 arch/x86/kernel/apic/summit_32.c      |  3 +--
 arch/x86/kernel/apic/x2apic_cluster.c | 17 -----------------
 arch/x86/kernel/apic/x2apic_phys.c    |  1 -
 arch/x86/kernel/apic/x2apic_uv_x.c    | 29 ++++++-----------------------
 arch/x86/platform/uv/uv_irq.c         |  2 +-
 15 files changed, 25 insertions(+), 105 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 1ed3eead2039..eec240e12091 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -331,8 +331,6 @@ struct apic {
 	unsigned long (*set_apic_id)(unsigned int id);
 	unsigned long apic_id_mask;
 
-	int (*cpu_mask_to_apicid)(const struct cpumask *cpumask,
-				  unsigned int *apicid);
 	int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
 				      const struct cpumask *andmask,
 				      unsigned int *apicid);
@@ -594,9 +592,15 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
 #endif
 
 static inline int
-__flat_cpu_mask_to_apicid(unsigned long cpu_mask, unsigned int *apicid)
+flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+			    const struct cpumask *andmask,
+			    unsigned int *apicid)
 {
-	cpu_mask = cpu_mask & APIC_ALL_CPUS & cpumask_bits(cpu_online_mask)[0];
+	unsigned long cpu_mask = cpumask_bits(cpumask)[0] &
+				 cpumask_bits(andmask)[0] &
+				 cpumask_bits(cpu_online_mask)[0] &
+				 APIC_ALL_CPUS;
+
 	if (likely(cpu_mask)) {
 		*apicid = (unsigned int)cpu_mask;
 		return 0;
@@ -605,27 +609,6 @@ __flat_cpu_mask_to_apicid(unsigned long cpu_mask, unsigned int *apicid)
 	}
 }
 
-static inline int
-flat_cpu_mask_to_apicid(const struct cpumask *cpumask,
-			unsigned int *apicid)
-{
-	return __flat_cpu_mask_to_apicid(cpumask_bits(cpumask)[0], apicid);
-}
-
-static inline int
-flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			    const struct cpumask *andmask,
-			    unsigned int *apicid)
-{
-	unsigned long mask1 = cpumask_bits(cpumask)[0];
-	unsigned long mask2 = cpumask_bits(andmask)[0];
-	return __flat_cpu_mask_to_apicid(mask1 & mask2, apicid);
-}
-
-extern int
-default_cpu_mask_to_apicid(const struct cpumask *cpumask,
-			   unsigned int *apicid);
-
 extern int
 default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 			       const struct cpumask *andmask,
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 7e9bbe73bc5a..048a4f806d46 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2123,23 +2123,6 @@ void default_init_apic_ldr(void)
 	apic_write(APIC_LDR, val);
 }
 
-static inline int __default_cpu_to_apicid(int cpu, unsigned int *apicid)
-{
-	if (likely((unsigned int)cpu < nr_cpu_ids)) {
-		*apicid = per_cpu(x86_cpu_to_apicid, cpu);
-		return 0;
-	} else {
-		return -EINVAL;
-	}
-}
-
-int default_cpu_mask_to_apicid(const struct cpumask *cpumask,
-			       unsigned int *apicid)
-{
-	int cpu = cpumask_first_and(cpumask, cpu_online_mask);
-	return __default_cpu_to_apicid(cpu, apicid);
-}
-
 int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 				   const struct cpumask *andmask,
 				   unsigned int *apicid)
@@ -2151,7 +2134,12 @@ int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 			break;
 	}
 
-	return __default_cpu_to_apicid(cpu, apicid);
+	if (likely((unsigned int)cpu < nr_cpu_ids)) {
+		*apicid = per_cpu(x86_cpu_to_apicid, cpu);
+		return 0;
+	} else {
+		return -EINVAL;
+	}
 }
 
 /*
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index bddc92566d0a..00c77cf78e9e 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -191,7 +191,6 @@ static struct apic apic_flat =  {
 	.set_apic_id			= set_apic_id,
 	.apic_id_mask			= 0xFFu << 24,
 
-	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= flat_send_IPI_mask,
@@ -308,7 +307,6 @@ static struct apic apic_physflat =  {
 	.set_apic_id			= set_apic_id,
 	.apic_id_mask			= 0xFFu << 24,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= physflat_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index ac9edf247b15..65c07fc630a1 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -159,7 +159,6 @@ struct apic apic_noop = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0x0F << 24,
 
-	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= noop_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c028132ad358..bc552cff2578 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -234,7 +234,6 @@ static struct apic apic_numachip __refconst = {
 	.set_apic_id			= set_apic_id,
 	.apic_id_mask			= 0xffU << 24,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= numachip_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index df342fe4d6aa..d50e3640d5ae 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -188,7 +188,6 @@ static struct apic apic_bigsmp = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0xFF << 24,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= bigsmp_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index b35cfb9b6962..2c5317ea1b83 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -525,7 +525,7 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
 	return 1;
 }
 
-static int
+static inline int
 es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
 {
 	unsigned int round = 0;
@@ -643,7 +643,6 @@ static struct apic __refdata apic_es7000_cluster = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0xFF << 24,
 
-	.cpu_mask_to_apicid		= es7000_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= es7000_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= es7000_send_IPI_mask,
@@ -710,7 +709,6 @@ static struct apic __refdata apic_es7000 = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0xFF << 24,
 
-	.cpu_mask_to_apicid		= es7000_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= es7000_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= es7000_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 0deb773404e5..0540f083f452 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1492,7 +1492,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
 	 * We use logical delivery to get the timer IRQ
 	 * to the first CPU.
 	 */
-	if (unlikely(apic->cpu_mask_to_apicid(apic->target_cpus(), &dest)))
+	if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(),
+						  apic->target_cpus(), &dest)))
 		dest = BAD_APICID;
 
 	entry.dest_mode = apic->irq_dest_mode;
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 2b55514c328b..d661ee95cabf 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -406,13 +406,6 @@ static inline int numaq_check_phys_apicid_present(int phys_apicid)
  * We use physical apicids here, not logical, so just return the default
  * physical broadcast to stop people from breaking us
  */
-static int
-numaq_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid)
-{
-	*apicid = 0x0F;
-	return 0;
-}
-
 static int
 numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 			     const struct cpumask *andmask,
@@ -499,7 +492,6 @@ static struct apic __refdata apic_numaq = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0x0F << 24,
 
-	.cpu_mask_to_apicid		= numaq_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= numaq_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= numaq_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 2c6f003b2e4b..eef6bcd1bf1e 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -108,7 +108,6 @@ static struct apic apic_default = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0x0F << 24,
 
-	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= default_send_IPI_mask_logical,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 79d360f6729e..bbad180f2890 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -263,7 +263,7 @@ static int summit_check_phys_apicid_present(int physical_apicid)
 	return 1;
 }
 
-static int
+static inline int
 summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
 {
 	unsigned int round = 0;
@@ -516,7 +516,6 @@ static struct apic apic_summit = {
 	.set_apic_id			= NULL,
 	.apic_id_mask			= 0xFF << 24,
 
-	.cpu_mask_to_apicid		= summit_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= summit_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= summit_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 1885a73b7f33..943d03fc6fc4 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -96,22 +96,6 @@ static void x2apic_send_IPI_all(int vector)
 	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
 }
 
-static int
-x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid)
-{
-	int cpu = cpumask_first_and(cpumask, cpu_online_mask);
-	int i;
-
-	if (cpu >= nr_cpu_ids)
-		return -EINVAL;
-
-	*apicid = 0;
-	for_each_cpu_and(i, cpumask, per_cpu(cpus_in_cluster, cpu))
-		*apicid |= per_cpu(x86_cpu_to_logical_apicid, i);
-
-	return 0;
-}
-
 static int
 x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 			      const struct cpumask *andmask,
@@ -270,7 +254,6 @@ static struct apic apic_x2apic_cluster = {
 	.set_apic_id			= x2apic_set_apic_id,
 	.apic_id_mask			= 0xFFFFFFFFu,
 
-	.cpu_mask_to_apicid		= x2apic_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= x2apic_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= x2apic_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index f109388a0e80..e03a1e180e81 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -123,7 +123,6 @@ static struct apic apic_x2apic_phys = {
 	.set_apic_id			= x2apic_set_apic_id,
 	.apic_id_mask			= 0xFFFFFFFFu,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= x2apic_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 307aa076bd62..026de0114d15 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -269,27 +269,6 @@ static void uv_init_apic_ldr(void)
 {
 }
 
-static inline int __uv_cpu_to_apicid(int cpu, unsigned int *apicid)
-{
-	if (likely((unsigned int)cpu < nr_cpu_ids)) {
-		*apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
-		return 0;
-	} else {
-		return -EINVAL;
-	}
-}
-
-static int
-uv_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid)
-{
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	int cpu = cpumask_first_and(cpumask, cpu_online_mask);
-	return __uv_cpu_to_apicid(cpu, apicid);
-}
-
 static int
 uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 			  const struct cpumask *andmask,
@@ -306,7 +285,12 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 			break;
 	}
 
-	return __uv_cpu_to_apicid(cpu, apicid);
+	if (likely((unsigned int)cpu < nr_cpu_ids)) {
+		*apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
+		return 0;
+	} else {
+		return -EINVAL;
+	}
 }
 
 static unsigned int x2apic_get_apic_id(unsigned long x)
@@ -384,7 +368,6 @@ static struct apic __refdata apic_x2apic_uv_x = {
 	.set_apic_id			= set_apic_id,
 	.apic_id_mask			= 0xFFFFFFFFu,
 
-	.cpu_mask_to_apicid		= uv_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid_and		= uv_cpu_mask_to_apicid_and,
 
 	.send_IPI_mask			= uv_send_IPI_mask,
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index dd1ff39a464c..a67c7a6bac7e 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -144,7 +144,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 	if (err != 0)
 		return err;
 
-	err = apic->cpu_mask_to_apicid(eligible_cpu, &dest);
+	err = apic->cpu_mask_to_apicid_and(eligible_cpu, eligible_cpu, &dest);
 	if (err != 0)
 		return err;
 
-- 
cgit v1.2.3


From ea3807ea52a53f2cdfd60c89d8491fc9a8208d1c Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Thu, 14 Jun 2012 09:49:55 +0200
Subject: x86/apic: Fix ugly casting and branching in cpu_mask_to_apicid_and()

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/20120614074954.GF3383@dhcp-26-207.brq.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/apic.c        | 8 ++++----
 arch/x86/kernel/apic/es7000_32.c   | 2 +-
 arch/x86/kernel/apic/summit_32.c   | 2 +-
 arch/x86/kernel/apic/x2apic_uv_x.c | 8 ++++----
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 048a4f806d46..c421512ca5eb 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2127,19 +2127,19 @@ int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 				   const struct cpumask *andmask,
 				   unsigned int *apicid)
 {
-	int cpu;
+	unsigned int cpu;
 
 	for_each_cpu_and(cpu, cpumask, andmask) {
 		if (cpumask_test_cpu(cpu, cpu_online_mask))
 			break;
 	}
 
-	if (likely((unsigned int)cpu < nr_cpu_ids)) {
+	if (likely(cpu < nr_cpu_ids)) {
 		*apicid = per_cpu(x86_cpu_to_apicid, cpu);
 		return 0;
-	} else {
-		return -EINVAL;
 	}
+
+	return -EINVAL;
 }
 
 /*
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 2c5317ea1b83..effece2ea0db 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -529,7 +529,7 @@ static inline int
 es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
 {
 	unsigned int round = 0;
-	int cpu, uninitialized_var(apicid);
+	unsigned int cpu, uninitialized_var(apicid);
 
 	/*
 	 * The cpus in the mask must all be on the apic cluster.
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index bbad180f2890..b53fd6c9993a 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -267,7 +267,7 @@ static inline int
 summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
 {
 	unsigned int round = 0;
-	int cpu, apicid = 0;
+	unsigned int cpu, apicid = 0;
 
 	/*
 	 * The cpus in the mask must all be on the apic cluster.
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 026de0114d15..8cfade9510a4 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -274,7 +274,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 			  const struct cpumask *andmask,
 			  unsigned int *apicid)
 {
-	int cpu;
+	int unsigned cpu;
 
 	/*
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
@@ -285,12 +285,12 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 			break;
 	}
 
-	if (likely((unsigned int)cpu < nr_cpu_ids)) {
+	if (likely(cpu < nr_cpu_ids)) {
 		*apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
 		return 0;
-	} else {
-		return -EINVAL;
 	}
+
+	return -EINVAL;
 }
 
 static unsigned int x2apic_get_apic_id(unsigned long x)
-- 
cgit v1.2.3


From 49ad3fd4834182cce9725abb98e080b479fed464 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Thu, 14 Jun 2012 09:50:11 +0200
Subject: x86/apic/es7000+summit: Fix compile warning in cpu_mask_to_apicid()

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/20120614075010.GG3383@dhcp-26-207.brq.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/es7000_32.c | 2 +-
 arch/x86/kernel/apic/summit_32.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index effece2ea0db..0c1347df3ad0 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -554,8 +554,8 @@ es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
 			      const struct cpumask *andmask,
 			      unsigned int *apicid)
 {
-	*apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
 	cpumask_var_t cpumask;
+	*apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
 
 	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
 		return 0;
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index b53fd6c9993a..e6cc1829f7c8 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -291,8 +291,8 @@ summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
 			      const struct cpumask *andmask,
 			      unsigned int *apicid)
 {
-	*apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
 	cpumask_var_t cpumask;
+	*apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
 
 	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
 		return 0;
-- 
cgit v1.2.3


From 214e270b5f5f6a85400a817d5305c797b2b7467a Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Thu, 14 Jun 2012 09:50:27 +0200
Subject: x86/apic/es7000+summit: Always make valid apicid from a cpumask

In case of invalid parameters cpu_mask_to_apicid_and() might
return apicid value of 0 (on Summit) or a uninitialized value
(on ES7000), although it is supposed to return apicid of cpu-0
at least. Fix the operation to always return a valid apicid.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/20120614075026.GH3383@dhcp-26-207.brq.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/es7000_32.c | 2 ++
 arch/x86/kernel/apic/summit_32.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 0c1347df3ad0..9882093f26e8 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -545,6 +545,8 @@ es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
 		apicid = new_apicid;
 		round++;
 	}
+	if (!round)
+		return -EINVAL;
 	*dest_id = apicid;
 	return 0;
 }
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index e6cc1829f7c8..b6e61857c29f 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -282,6 +282,8 @@ summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
 		apicid |= new_apicid;
 		round++;
 	}
+	if (!round)
+		return -EINVAL;
 	*dest_id = apicid;
 	return 0;
 }
-- 
cgit v1.2.3


From 5a0a2a308113086cc800a203d903271c9caa1611 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Thu, 14 Jun 2012 09:50:44 +0200
Subject: x86/apic/es7000: Make apicid of a cluster (not CPU) from a cpumask

cpu_mask_to_apicid_and() always returns apicid of a single CPU,
even in case multiple CPUs were requested. This update fixes a
typo and forces apicid of a cluster to be returned.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/20120614075043.GI3383@dhcp-26-207.brq.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/es7000_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 9882093f26e8..0874799a98c6 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -542,7 +542,7 @@ es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
 
 			return -EINVAL;
 		}
-		apicid = new_apicid;
+		apicid |= new_apicid;
 		round++;
 	}
 	if (!round)
-- 
cgit v1.2.3


From 8d240dd88cca33b704adf3fe281aa64b5aac2dd8 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@amd64.org>
Date: Thu, 29 Mar 2012 19:11:40 +0200
Subject: ftrace: Remove a superfluous check

register_ftrace_function() checks ftrace_disabled and calls
__register_ftrace_function which does it again.

Drop the first check and add the unlikely hint to the second one. Also,
drop the label as John correctly notices.

No functional change.

Link: http://lkml.kernel.org/r/20120329171140.GE6409@aftab

Cc: Borislav Petkov <bp@amd64.org>
Cc: John Kacur <jkacur@redhat.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a008663d86c8..b4f20fba09fc 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -312,7 +312,7 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list,
 
 static int __register_ftrace_function(struct ftrace_ops *ops)
 {
-	if (ftrace_disabled)
+	if (unlikely(ftrace_disabled))
 		return -ENODEV;
 
 	if (FTRACE_WARN_ON(ops == &global_ops))
@@ -4299,16 +4299,12 @@ int register_ftrace_function(struct ftrace_ops *ops)
 
 	mutex_lock(&ftrace_lock);
 
-	if (unlikely(ftrace_disabled))
-		goto out_unlock;
-
 	ret = __register_ftrace_function(ops);
 	if (!ret)
 		ret = ftrace_startup(ops, 0);
 
-
- out_unlock:
 	mutex_unlock(&ftrace_lock);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(register_ftrace_function);
-- 
cgit v1.2.3


From 5da43bed800770906fca24deef7ae3d456823b86 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 31 May 2012 21:28:58 -0400
Subject: tracing: Add comments for the other bits of ftrace_event_call.flags

	TRACE_EVENT_FL_ENABLED_BIT,
	TRACE_EVENT_FL_FILTERED_BIT,
	TRACE_EVENT_FL_RECORDED_CMD_BIT,

Have comments about what they are, but:

	TRACE_EVENT_FL_CAP_ANY_BIT,
	TRACE_EVENT_FL_NO_SET_FILTER_BIT,
	TRACE_EVENT_FL_IGNORE_ENABLE_BIT,

do not, making them second class citizens. To prevent another
class warfare, these bits have protested for their right to be
commented. And By Golly! I'll give them what they want!

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 176a939d1547..1aff18346c71 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -207,6 +207,9 @@ struct ftrace_event_call {
 	 *   bit 1:		enabled
 	 *   bit 2:		filter_active
 	 *   bit 3:		enabled cmd record
+	 *   bit 4:		allow trace by non root (cap any)
+	 *   bit 5:		failed to apply filter
+	 *   bit 6:		ftrace internal event (do not enable)
 	 *
 	 * Changes to flags must hold the event_mutex.
 	 *
-- 
cgit v1.2.3


From 7374e82771c6d5a9af2080be46f64a5826c7efb1 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 31 May 2012 21:40:05 -0400
Subject: tracing: Register the ftrace internal events during early boot

All trace events including ftrace internel events (like trace_printk
and function tracing), register functions that describe how to print
their output. The events may be recorded as soon as the ring buffer
is allocated, but they are just raw binary in the buffer. The mapping
of event ids to how to print them are held within a structure that
is registered on system boot.

If a crash happens in boot up before these functions are registered
then their output (via ftrace_dump_on_oops) will be useless:

Dumping ftrace buffer:
---------------------------------
   <...>-1       0.... 319705us : Unknown type 6
---------------------------------

This can be quite frustrating for a kernel developer trying to see
what is going wrong.

There's no reason to register them so late in the boot up process.
They can be registered by early_initcall().

Reported-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index df611a0e76c5..123b189c732c 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1325,4 +1325,4 @@ __init static int init_events(void)
 
 	return 0;
 }
-device_initcall(init_events);
+early_initcall(init_events);
-- 
cgit v1.2.3


From d48daf37a3d2e2b28a61e615c0fc538301edb0dd Mon Sep 17 00:00:00 2001
From: Ido Yariv <ido@wizery.com>
Date: Thu, 14 Jun 2012 18:43:08 +0300
Subject: x86/vsmp: Fix linker error when CONFIG_PROC_FS is not set

set_vsmp_pv_ops() references no_irq_affinity which is undeclared
if CONFIG_PROC_FS isn't set. Fix this by adding an #ifdef around
this variable's access.

Reported-by: Fengguang Wu <wfg@linux.intel.com>
Signed-off-by: Ido Yariv <ido@wizery.com>
Acked-by: Shai Fultheim <shai@scalemp.com>
Link: http://lkml.kernel.org/r/1339688588-12674-1-git-send-email-ido@wizery.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/vsmp_64.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 6b96a7374f94..3f0285ac00fa 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -101,7 +101,10 @@ static void __init set_vsmp_pv_ops(void)
 #ifdef CONFIG_SMP
 	if (cap & ctl & BIT(8)) {
 		ctl &= ~BIT(8);
+#ifdef CONFIG_PROC_FS
+		/* Don't let users change irq affinity via procfs */
 		no_irq_affinity = 1;
+#endif
 	}
 #endif
 
-- 
cgit v1.2.3


From 7eb9ae0799b1e9f0b77733b432bc5f6f055b020b Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Thu, 14 Jun 2012 18:28:49 -0700
Subject: irq/apic: Use config_enabled(CONFIG_SMP) checks to clean up
 irq_set_affinity() for UP

Move the ->irq_set_affinity() routines out of the #ifdef CONFIG_SMP
sections and use config_enabled(CONFIG_SMP) checks inside those
routines. Thus making those routines simple null stubs for
!CONFIG_SMP and retaining those routines with no additional
runtime overhead for CONFIG_SMP kernels.

Cleans up the ifdef CONFIG_SMP in and around routines related to
irq_set_affinity in io_apic and irq_remapping subsystems.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: torvalds@linux-foundation.org
Cc: joerg.roedel@amd.com
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Link: http://lkml.kernel.org/r/1339723729.3475.63.camel@sbsiddha-desk.sc.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/io_apic.c      | 180 ++++++++++++++++--------------------
 drivers/iommu/intel_irq_remapping.c |   7 +-
 drivers/iommu/irq_remapping.c       |   5 +-
 drivers/iommu/irq_remapping.h       |   2 -
 include/linux/irq.h                 |   2 -
 5 files changed, 86 insertions(+), 110 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7cbd397884f5..a951ef7decb1 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2224,81 +2224,6 @@ void send_cleanup_vector(struct irq_cfg *cfg)
 	cfg->move_in_progress = 0;
 }
 
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
-{
-	int apic, pin;
-	struct irq_pin_list *entry;
-	u8 vector = cfg->vector;
-
-	for_each_irq_pin(entry, cfg->irq_2_pin) {
-		unsigned int reg;
-
-		apic = entry->apic;
-		pin = entry->pin;
-		/*
-		 * With interrupt-remapping, destination information comes
-		 * from interrupt-remapping table entry.
-		 */
-		if (!irq_remapped(cfg))
-			io_apic_write(apic, 0x11 + pin*2, dest);
-		reg = io_apic_read(apic, 0x10 + pin*2);
-		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
-		reg |= vector;
-		io_apic_modify(apic, 0x10 + pin*2, reg);
-	}
-}
-
-/*
- * Either sets data->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
- * leaves data->affinity untouched.
- */
-int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
-			  unsigned int *dest_id)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned int irq = data->irq;
-	int err;
-
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return -EINVAL;
-
-	err = assign_irq_vector(irq, cfg, mask);
-	if (err)
-		return err;
-
-	err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
-	if (err) {
-		if (assign_irq_vector(irq, cfg, data->affinity))
-			pr_err("Failed to recover vector for irq %d\n", irq);
-		return err;
-	}
-
-	cpumask_copy(data->affinity, mask);
-
-	return 0;
-}
-
-static int
-ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
-		    bool force)
-{
-	unsigned int dest, irq = data->irq;
-	unsigned long flags;
-	int ret;
-
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (!ret) {
-		/* Only the high 8 bits are valid. */
-		dest = SET_APIC_LOGICAL_ID(dest);
-		__target_IO_APIC_irq(irq, dest, data->chip_data);
-		ret = IRQ_SET_MASK_OK_NOCOPY;
-	}
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-	return ret;
-}
-
 asmlinkage void smp_irq_move_cleanup_interrupt(void)
 {
 	unsigned vector, me;
@@ -2386,6 +2311,87 @@ void irq_force_complete_move(int irq)
 static inline void irq_complete_move(struct irq_cfg *cfg) { }
 #endif
 
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+{
+	int apic, pin;
+	struct irq_pin_list *entry;
+	u8 vector = cfg->vector;
+
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
+		unsigned int reg;
+
+		apic = entry->apic;
+		pin = entry->pin;
+		/*
+		 * With interrupt-remapping, destination information comes
+		 * from interrupt-remapping table entry.
+		 */
+		if (!irq_remapped(cfg))
+			io_apic_write(apic, 0x11 + pin*2, dest);
+		reg = io_apic_read(apic, 0x10 + pin*2);
+		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+		reg |= vector;
+		io_apic_modify(apic, 0x10 + pin*2, reg);
+	}
+}
+
+/*
+ * Either sets data->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
+ * leaves data->affinity untouched.
+ */
+int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+			  unsigned int *dest_id)
+{
+	struct irq_cfg *cfg = data->chip_data;
+	unsigned int irq = data->irq;
+	int err;
+
+	if (!config_enabled(CONFIG_SMP))
+		return -1;
+
+	if (!cpumask_intersects(mask, cpu_online_mask))
+		return -EINVAL;
+
+	err = assign_irq_vector(irq, cfg, mask);
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
+	if (err) {
+		if (assign_irq_vector(irq, cfg, data->affinity))
+			pr_err("Failed to recover vector for irq %d\n", irq);
+		return err;
+	}
+
+	cpumask_copy(data->affinity, mask);
+
+	return 0;
+}
+
+static int
+ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		    bool force)
+{
+	unsigned int dest, irq = data->irq;
+	unsigned long flags;
+	int ret;
+
+	if (!config_enabled(CONFIG_SMP))
+		return -1;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	ret = __ioapic_set_affinity(data, mask, &dest);
+	if (!ret) {
+		/* Only the high 8 bits are valid. */
+		dest = SET_APIC_LOGICAL_ID(dest);
+		__target_IO_APIC_irq(irq, dest, data->chip_data);
+		ret = IRQ_SET_MASK_OK_NOCOPY;
+	}
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	return ret;
+}
+
 static void ack_apic_edge(struct irq_data *data)
 {
 	irq_complete_move(data->chip_data);
@@ -2565,9 +2571,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
 	chip->irq_ack = ir_ack_apic_edge;
 	chip->irq_eoi = ir_ack_apic_level;
 
-#ifdef CONFIG_SMP
 	chip->irq_set_affinity = set_remapped_irq_affinity;
-#endif
 }
 #endif /* CONFIG_IRQ_REMAP */
 
@@ -2578,9 +2582,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
 	.irq_unmask		= unmask_ioapic_irq,
 	.irq_ack		= ack_apic_edge,
 	.irq_eoi		= ack_apic_level,
-#ifdef CONFIG_SMP
 	.irq_set_affinity	= ioapic_set_affinity,
-#endif
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
@@ -3099,7 +3101,6 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
 	return err;
 }
 
-#ifdef CONFIG_SMP
 static int
 msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
 {
@@ -3121,7 +3122,6 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
 
 	return IRQ_SET_MASK_OK_NOCOPY;
 }
-#endif /* CONFIG_SMP */
 
 /*
  * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
@@ -3132,9 +3132,7 @@ static struct irq_chip msi_chip = {
 	.irq_unmask		= unmask_msi_irq,
 	.irq_mask		= mask_msi_irq,
 	.irq_ack		= ack_apic_edge,
-#ifdef CONFIG_SMP
 	.irq_set_affinity	= msi_set_affinity,
-#endif
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
@@ -3219,7 +3217,6 @@ void native_teardown_msi_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_DMAR_TABLE
-#ifdef CONFIG_SMP
 static int
 dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
 		      bool force)
@@ -3244,16 +3241,12 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
 	return IRQ_SET_MASK_OK_NOCOPY;
 }
 
-#endif /* CONFIG_SMP */
-
 static struct irq_chip dmar_msi_type = {
 	.name			= "DMAR_MSI",
 	.irq_unmask		= dmar_msi_unmask,
 	.irq_mask		= dmar_msi_mask,
 	.irq_ack		= ack_apic_edge,
-#ifdef CONFIG_SMP
 	.irq_set_affinity	= dmar_msi_set_affinity,
-#endif
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
@@ -3274,7 +3267,6 @@ int arch_setup_dmar_msi(unsigned int irq)
 
 #ifdef CONFIG_HPET_TIMER
 
-#ifdef CONFIG_SMP
 static int hpet_msi_set_affinity(struct irq_data *data,
 				 const struct cpumask *mask, bool force)
 {
@@ -3297,16 +3289,12 @@ static int hpet_msi_set_affinity(struct irq_data *data,
 	return IRQ_SET_MASK_OK_NOCOPY;
 }
 
-#endif /* CONFIG_SMP */
-
 static struct irq_chip hpet_msi_type = {
 	.name = "HPET_MSI",
 	.irq_unmask = hpet_msi_unmask,
 	.irq_mask = hpet_msi_mask,
 	.irq_ack = ack_apic_edge,
-#ifdef CONFIG_SMP
 	.irq_set_affinity = hpet_msi_set_affinity,
-#endif
 	.irq_retrigger = ioapic_retrigger_irq,
 };
 
@@ -3341,8 +3329,6 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
  */
 #ifdef CONFIG_HT_IRQ
 
-#ifdef CONFIG_SMP
-
 static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 {
 	struct ht_irq_msg msg;
@@ -3370,16 +3356,12 @@ ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
 	return IRQ_SET_MASK_OK_NOCOPY;
 }
 
-#endif
-
 static struct irq_chip ht_irq_chip = {
 	.name			= "PCI-HT",
 	.irq_mask		= mask_ht_irq,
 	.irq_unmask		= unmask_ht_irq,
 	.irq_ack		= ack_apic_edge,
-#ifdef CONFIG_SMP
 	.irq_set_affinity	= ht_set_affinity,
-#endif
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 853902a1b7db..e0b18f3ae9a8 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -902,7 +902,6 @@ static int intel_setup_ioapic_entry(int irq,
 	return 0;
 }
 
-#ifdef CONFIG_SMP
 /*
  * Migrate the IO-APIC irq in the presence of intr-remapping.
  *
@@ -926,6 +925,9 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
 	struct irte irte;
 	int err;
 
+	if (!config_enabled(CONFIG_SMP))
+		return -EINVAL;
+
 	if (!cpumask_intersects(mask, cpu_online_mask))
 		return -EINVAL;
 
@@ -963,7 +965,6 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
 	cpumask_copy(data->affinity, mask);
 	return 0;
 }
-#endif
 
 static void intel_compose_msi_msg(struct pci_dev *pdev,
 				  unsigned int irq, unsigned int dest,
@@ -1065,9 +1066,7 @@ struct irq_remap_ops intel_irq_remap_ops = {
 	.reenable		= reenable_irq_remapping,
 	.enable_faulting	= enable_drhd_fault_handling,
 	.setup_ioapic_entry	= intel_setup_ioapic_entry,
-#ifdef CONFIG_SMP
 	.set_affinity		= intel_ioapic_set_affinity,
-#endif
 	.free_irq		= free_irte,
 	.compose_msi_msg	= intel_compose_msi_msg,
 	.msi_alloc_irq		= intel_msi_alloc_irq,
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 40cda8e98d87..1d29b1c66e72 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -111,16 +111,15 @@ int setup_ioapic_remapped_entry(int irq,
 					     vector, attr);
 }
 
-#ifdef CONFIG_SMP
 int set_remapped_irq_affinity(struct irq_data *data, const struct cpumask *mask,
 			      bool force)
 {
-	if (!remap_ops || !remap_ops->set_affinity)
+	if (!config_enabled(CONFIG_SMP) || !remap_ops ||
+	    !remap_ops->set_affinity)
 		return 0;
 
 	return remap_ops->set_affinity(data, mask, force);
 }
-#endif
 
 void free_remapped_irq(int irq)
 {
diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
index be9d72950c51..b12974cc1dfe 100644
--- a/drivers/iommu/irq_remapping.h
+++ b/drivers/iommu/irq_remapping.h
@@ -59,11 +59,9 @@ struct irq_remap_ops {
 				  unsigned int, int,
 				  struct io_apic_irq_attr *);
 
-#ifdef CONFIG_SMP
 	/* Set the CPU affinity of a remapped interrupt */
 	int (*set_affinity)(struct irq_data *data, const struct cpumask *mask,
 			    bool force);
-#endif
 
 	/* Free an IRQ */
 	int (*free_irq)(int);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 61f5cec031e0..47a937cd84af 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -150,9 +150,7 @@ struct irq_data {
 	void			*handler_data;
 	void			*chip_data;
 	struct msi_desc		*msi_desc;
-#ifdef CONFIG_SMP
 	cpumask_var_t		affinity;
-#endif
 };
 
 /*
-- 
cgit v1.2.3


From ea131377148cdfe90641b42ae9aa5a6b3a4fa327 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 15 Jun 2012 17:43:22 +0200
Subject: uprobes: Valid_vma() should reject VM_HUGETLB

__replace_page() obviously can't work with the hugetlbfs
mappings, uprobe_register() will likely crash the kernel. Change
valid_vma() to check VM_HUGETLB as well.

As for PageTransHuge() no need to worry, vma->vm_file != NULL.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120615154322.GA9561@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index b52376d02332..f0d04530af63 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -99,7 +99,8 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register)
 	if (!is_register)
 		return true;
 
-	if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC))
+	if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED))
+				== (VM_READ|VM_EXEC))
 		return true;
 
 	return false;
-- 
cgit v1.2.3


From cc359d180fa9c25a4c1819f17e07a422d788353d Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 15 Jun 2012 17:43:25 +0200
Subject: uprobes: __copy_insn() should ensure a_ops->readpage != NULL

__copy_insn() blindly calls read_mapping_page(), this will crash
the kernel if ->readpage == NULL, add the necessary check. For
example, hugetlbfs_aops->readpage is NULL. Perhaps we should
change read_mapping_page() instead.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120615154325.GA9568@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index f0d04530af63..604930bf9c92 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -610,6 +610,9 @@ __copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *ins
 	if (!filp)
 		return -EINVAL;
 
+	if (!mapping->a_ops->readpage)
+		return -EIO;
+
 	idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT);
 	off1 = offset &= ~PAGE_MASK;
 
-- 
cgit v1.2.3


From 5323ce71e4b4e1f188ebbc0cc7776885ea6c75fb Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 15 Jun 2012 17:43:28 +0200
Subject: uprobes: Write_opcode()->__replace_page() can race with
 try_to_unmap()

write_opcode() gets old_page via get_user_pages() and then calls
__replace_page() which assumes that this old_page is still
mapped after pte_offset_map_lock().

This is not true if this old_page was already try_to_unmap()'ed,
and in this case everything __replace_page() does with old_page
is wrong. Just for example, put_page() is not balanced.

I think it is possible to teach __replace_page() to handle this
unlikely case correctly, but this patch simply changes it to use
page_check_address() and return -EAGAIN if it fails. The caller
should notice this error code and retry.

Note: write_opcode() asks for the cleanups, I'll try to do this
in a separate patch.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120615154328.GA9571@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 41 +++++++++++++----------------------------
 1 file changed, 13 insertions(+), 28 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 604930bf9c92..3ccdb29ee8d6 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -129,33 +129,17 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset)
 static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *ptep;
-	spinlock_t *ptl;
 	unsigned long addr;
-	int err = -EFAULT;
+	spinlock_t *ptl;
+	pte_t *ptep;
 
 	addr = page_address_in_vma(page, vma);
 	if (addr == -EFAULT)
-		goto out;
-
-	pgd = pgd_offset(mm, addr);
-	if (!pgd_present(*pgd))
-		goto out;
-
-	pud = pud_offset(pgd, addr);
-	if (!pud_present(*pud))
-		goto out;
+		return -EFAULT;
 
-	pmd = pmd_offset(pud, addr);
-	if (!pmd_present(*pmd))
-		goto out;
-
-	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	ptep = page_check_address(page, mm, addr, &ptl, 0);
 	if (!ptep)
-		goto out;
+		return -EAGAIN;
 
 	get_page(kpage);
 	page_add_new_anon_rmap(kpage, vma, addr);
@@ -174,10 +158,8 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct
 		try_to_free_swap(page);
 	put_page(page);
 	pte_unmap_unlock(ptep, ptl);
-	err = 0;
 
-out:
-	return err;
+	return 0;
 }
 
 /**
@@ -222,9 +204,10 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	void *vaddr_old, *vaddr_new;
 	struct vm_area_struct *vma;
 	struct uprobe *uprobe;
+	unsigned long pgoff;
 	loff_t addr;
 	int ret;
-
+retry:
 	/* Read the page with vaddr into memory */
 	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma);
 	if (ret <= 0)
@@ -269,9 +252,9 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	memcpy(vaddr_new, vaddr_old, PAGE_SIZE);
 
 	/* poke the new insn in, ASSUMES we don't cross page boundary */
-	vaddr &= ~PAGE_MASK;
-	BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
-	memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
+	pgoff = (vaddr & ~PAGE_MASK);
+	BUG_ON(pgoff + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
+	memcpy(vaddr_new + pgoff, &opcode, UPROBE_SWBP_INSN_SIZE);
 
 	kunmap_atomic(vaddr_new);
 	kunmap_atomic(vaddr_old);
@@ -291,6 +274,8 @@ unlock_out:
 put_out:
 	put_page(old_page);
 
+	if (unlikely(ret == -EAGAIN))
+		goto retry;
 	return ret;
 }
 
-- 
cgit v1.2.3


From c1914a0936f79ed0236f670122e06e36e4d332ee Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 15 Jun 2012 17:43:31 +0200
Subject: uprobes: Install_breakpoint() should fail if is_swbp_insn() == T

install_breakpoint() returns -EEXIST if is_swbp_insn(orig_insn)
== T, the caller treats this code as success.

This is doubly wrong. The successful return should set
UPROBE_COPY_INSN, but the real problem is that it shouldn't
succeed. If the probed insn is int3 the application should get
SIGTRAP, this won't happen with uprobe.

Probably we can fix this, we can add the UPROBE_SHARED_BP flag
and teach handle_swbp/set_orig_insn to handle this case
correctly. But this needs some complications and we have other
insns which can't be probed, lets make a simple fix for now.

I think this needs a cleanup. UPROBE_COPY_INSN should die,
copy_insn() should be called by alloc_uprobe().
arch_uprobe_analyze_insn() depends on ->mm (ia32_compat) but it
is called only once.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120615154331.GA9578@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 3ccdb29ee8d6..ec78152e32e9 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -693,7 +693,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
 			return ret;
 
 		if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
-			return -EEXIST;
+			return -ENOTSUPP;
 
 		ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, addr);
 		if (ret)
-- 
cgit v1.2.3


From 268720903f87e0b84b161626c4447b81671b5d18 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 15 Jun 2012 17:43:33 +0200
Subject: uprobes: Rework register_for_each_vma() to make it O(n)

Currently register_for_each_vma() is O(n ** 2) + O(n ** 3),
every time find_next_vma_info() "restarts" the
vma_prio_tree_foreach() loop and each iteration rechecks the
whole try_list. This also means that try_list can grow
"indefinitely" if register/unregister races with munmap/mmap
activity even if the number of mapping is bounded at any time.

With this patch register_for_each_vma() builds the list of
mm/vaddr structures only once and does install_breakpoint() for
each entry.

We do not care about the new mappings which can be created after
build_map_info() drops mapping->i_mmap_mutex, uprobe_mmap()
should do its work.

Note that we do not allocate map_info under i_mmap_mutex, this
can deadlock with page reclaim (but see the next patch). So we
use 2 lists, "curr" which we are going to return, and "prev"
which holds the already allocated memory. The main loop deques
the entry from "prev" (initially it is empty), and if "prev"
becomes empty again it counts the number of entries we need to
pre-allocate outside of i_mmap_mutex.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Link: http://lkml.kernel.org/r/20120615154333.GA9581@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 199 +++++++++++++++++++++---------------------------
 1 file changed, 86 insertions(+), 113 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index ec78152e32e9..4e0db3496d70 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -60,17 +60,6 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
  */
 static atomic_t uprobe_events = ATOMIC_INIT(0);
 
-/*
- * Maintain a temporary per vma info that can be used to search if a vma
- * has already been handled. This structure is introduced since extending
- * vm_area_struct wasnt recommended.
- */
-struct vma_info {
-	struct list_head	probe_list;
-	struct mm_struct	*mm;
-	loff_t			vaddr;
-};
-
 struct uprobe {
 	struct rb_node		rb_node;	/* node in the rb tree */
 	atomic_t		ref;
@@ -742,139 +731,123 @@ static void delete_uprobe(struct uprobe *uprobe)
 	atomic_dec(&uprobe_events);
 }
 
-static struct vma_info *
-__find_next_vma_info(struct address_space *mapping, struct list_head *head,
-			struct vma_info *vi, loff_t offset, bool is_register)
+struct map_info {
+	struct map_info *next;
+	struct mm_struct *mm;
+	loff_t vaddr;
+};
+
+static inline struct map_info *free_map_info(struct map_info *info)
 {
+	struct map_info *next = info->next;
+	kfree(info);
+	return next;
+}
+
+static struct map_info *
+build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
+{
+	unsigned long pgoff = offset >> PAGE_SHIFT;
 	struct prio_tree_iter iter;
 	struct vm_area_struct *vma;
-	struct vma_info *tmpvi;
-	unsigned long pgoff;
-	int existing_vma;
-	loff_t vaddr;
-
-	pgoff = offset >> PAGE_SHIFT;
+	struct map_info *curr = NULL;
+	struct map_info *prev = NULL;
+	struct map_info *info;
+	int more = 0;
 
+ again:
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		if (!valid_vma(vma, is_register))
 			continue;
 
-		existing_vma = 0;
-		vaddr = vma_address(vma, offset);
-
-		list_for_each_entry(tmpvi, head, probe_list) {
-			if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) {
-				existing_vma = 1;
-				break;
-			}
-		}
-
-		/*
-		 * Another vma needs a probe to be installed. However skip
-		 * installing the probe if the vma is about to be unlinked.
-		 */
-		if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) {
-			vi->mm = vma->vm_mm;
-			vi->vaddr = vaddr;
-			list_add(&vi->probe_list, head);
-
-			return vi;
+		if (!prev) {
+			more++;
+			continue;
 		}
-	}
-
-	return NULL;
-}
 
-/*
- * Iterate in the rmap prio tree  and find a vma where a probe has not
- * yet been inserted.
- */
-static struct vma_info *
-find_next_vma_info(struct address_space *mapping, struct list_head *head,
-		loff_t offset, bool is_register)
-{
-	struct vma_info *vi, *retvi;
+		if (!atomic_inc_not_zero(&vma->vm_mm->mm_users))
+			continue;
 
-	vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL);
-	if (!vi)
-		return ERR_PTR(-ENOMEM);
+		info = prev;
+		prev = prev->next;
+		info->next = curr;
+		curr = info;
 
-	mutex_lock(&mapping->i_mmap_mutex);
-	retvi = __find_next_vma_info(mapping, head, vi, offset, is_register);
+		info->mm = vma->vm_mm;
+		info->vaddr = vma_address(vma, offset);
+	}
 	mutex_unlock(&mapping->i_mmap_mutex);
 
-	if (!retvi)
-		kfree(vi);
+	if (!more)
+		goto out;
+
+	prev = curr;
+	while (curr) {
+		mmput(curr->mm);
+		curr = curr->next;
+	}
 
-	return retvi;
+	do {
+		info = kmalloc(sizeof(struct map_info), GFP_KERNEL);
+		if (!info) {
+			curr = ERR_PTR(-ENOMEM);
+			goto out;
+		}
+		info->next = prev;
+		prev = info;
+	} while (--more);
+
+	goto again;
+ out:
+	while (prev)
+		prev = free_map_info(prev);
+	return curr;
 }
 
 static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 {
-	struct list_head try_list;
-	struct vm_area_struct *vma;
-	struct address_space *mapping;
-	struct vma_info *vi, *tmpvi;
-	struct mm_struct *mm;
-	loff_t vaddr;
-	int ret;
+	struct map_info *info;
+	int err = 0;
 
-	mapping = uprobe->inode->i_mapping;
-	INIT_LIST_HEAD(&try_list);
-
-	ret = 0;
+	info = build_map_info(uprobe->inode->i_mapping,
+					uprobe->offset, is_register);
+	if (IS_ERR(info))
+		return PTR_ERR(info);
 
-	for (;;) {
-		vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register);
-		if (!vi)
-			break;
+	while (info) {
+		struct mm_struct *mm = info->mm;
+		struct vm_area_struct *vma;
+		loff_t vaddr;
 
-		if (IS_ERR(vi)) {
-			ret = PTR_ERR(vi);
-			break;
-		}
+		if (err)
+			goto free;
 
-		mm = vi->mm;
 		down_write(&mm->mmap_sem);
-		vma = find_vma(mm, (unsigned long)vi->vaddr);
-		if (!vma || !valid_vma(vma, is_register)) {
-			list_del(&vi->probe_list);
-			kfree(vi);
-			up_write(&mm->mmap_sem);
-			mmput(mm);
-			continue;
-		}
+		vma = find_vma(mm, (unsigned long)info->vaddr);
+		if (!vma || !valid_vma(vma, is_register))
+			goto unlock;
+
 		vaddr = vma_address(vma, uprobe->offset);
 		if (vma->vm_file->f_mapping->host != uprobe->inode ||
-						vaddr != vi->vaddr) {
-			list_del(&vi->probe_list);
-			kfree(vi);
-			up_write(&mm->mmap_sem);
-			mmput(mm);
-			continue;
-		}
+						vaddr != info->vaddr)
+			goto unlock;
 
-		if (is_register)
-			ret = install_breakpoint(uprobe, mm, vma, vi->vaddr);
-		else
-			remove_breakpoint(uprobe, mm, vi->vaddr);
-
-		up_write(&mm->mmap_sem);
-		mmput(mm);
 		if (is_register) {
-			if (ret && ret == -EEXIST)
-				ret = 0;
-			if (ret)
-				break;
+			err = install_breakpoint(uprobe, mm, vma, info->vaddr);
+			if (err == -EEXIST)
+				err = 0;
+		} else {
+			remove_breakpoint(uprobe, mm, info->vaddr);
 		}
+ unlock:
+		up_write(&mm->mmap_sem);
+ free:
+		mmput(mm);
+		info = free_map_info(info);
 	}
 
-	list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) {
-		list_del(&vi->probe_list);
-		kfree(vi);
-	}
-
-	return ret;
+	return err;
 }
 
 static int __uprobe_register(struct uprobe *uprobe)
-- 
cgit v1.2.3


From 7a5bfb66b07f22d2429db776da7bb8b57bfb5cff Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 15 Jun 2012 17:43:36 +0200
Subject: uprobes: Change build_map_info() to try kmalloc(GFP_NOWAIT) first

build_map_info() doesn't allocate the memory under i_mmap_mutex
to avoid the deadlock with page reclaim. But it can try
GFP_NOWAIT first, it should work in the likely case and thus we
almost never need the pre-alloc-and-retry path.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Link: http://lkml.kernel.org/r/20120615154336.GA9588@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 4e0db3496d70..897417dbca8e 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -761,6 +761,16 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
 		if (!valid_vma(vma, is_register))
 			continue;
 
+		if (!prev && !more) {
+			/*
+			 * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through
+			 * reclaim. This is optimistic, no harm done if it fails.
+			 */
+			prev = kmalloc(sizeof(struct map_info),
+					GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN);
+			if (prev)
+				prev->next = NULL;
+		}
 		if (!prev) {
 			more++;
 			continue;
-- 
cgit v1.2.3


From c5784de2b351fe871bb57487878f7fc7ec5b075c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 15 Jun 2012 17:43:39 +0200
Subject: uprobes: Document uprobe_register() vs uprobe_mmap() race

Because the mind is treacherous and makes us forget we need to
write stuff down.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20120615154339.GA9591@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 897417dbca8e..2671d9ad49be 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -44,6 +44,23 @@ static DEFINE_SPINLOCK(uprobes_treelock);	/* serialize rbtree access */
 
 #define UPROBES_HASH_SZ	13
 
+/*
+ * We need separate register/unregister and mmap/munmap lock hashes because
+ * of mmap_sem nesting.
+ *
+ * uprobe_register() needs to install probes on (potentially) all processes
+ * and thus needs to acquire multiple mmap_sems (consequtively, not
+ * concurrently), whereas uprobe_mmap() is called while holding mmap_sem
+ * for the particular process doing the mmap.
+ *
+ * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem
+ * because of lock order against i_mmap_mutex. This means there's a hole in
+ * the register vma iteration where a mmap() can happen.
+ *
+ * Thus uprobe_register() can race with uprobe_mmap() and we can try and
+ * install a probe where one is already installed.
+ */
+
 /* serialize (un)register */
 static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
 
@@ -339,7 +356,9 @@ out:
 int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
 {
 	int result;
-
+	/*
+	 * See the comment near uprobes_hash().
+	 */
 	result = is_swbp_at_addr(mm, vaddr);
 	if (result == 1)
 		return -EEXIST;
@@ -845,6 +864,10 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 
 		if (is_register) {
 			err = install_breakpoint(uprobe, mm, vma, info->vaddr);
+			/*
+			 * We can race against uprobe_mmap(), see the
+			 * comment near uprobe_hash().
+			 */
 			if (err == -EEXIST)
 				err = 0;
 		} else {
@@ -1054,8 +1077,10 @@ int uprobe_mmap(struct vm_area_struct *vma)
 			}
 
 			ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
-
-			/* Ignore double add: */
+			/*
+			 * We can race against uprobe_register(), see the
+			 * comment near uprobe_hash().
+			 */
 			if (ret == -EEXIST) {
 				ret = 0;
 
-- 
cgit v1.2.3


From d436615e60c386095dac4a9bf72b08868d2a7564 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 15 Jun 2012 17:43:42 +0200
Subject: uprobes: Copy_insn() shouldn't depend on mm/vma/vaddr

1. copy_insn() doesn't need "addr", it can use uprobe->offset.
   Remove this argument.

2. Change copy_insn/__copy_insn to accept "struct file*" instead
   of vma.

copy_insn() is called only once and mm/vma/vaddr are random, it
shouldn't depend on them.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120615154342.GA9598@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 2671d9ad49be..08ef566da763 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -591,10 +591,9 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
 }
 
 static int
-__copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *insn,
+__copy_insn(struct address_space *mapping, struct file *filp, char *insn,
 			unsigned long nbytes, unsigned long offset)
 {
-	struct file *filp = vma->vm_file;
 	struct page *page;
 	void *vaddr;
 	unsigned long off1;
@@ -625,15 +624,13 @@ __copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *ins
 	return 0;
 }
 
-static int
-copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)
+static int copy_insn(struct uprobe *uprobe, struct file *filp)
 {
 	struct address_space *mapping;
 	unsigned long nbytes;
 	int bytes;
 
-	addr &= ~PAGE_MASK;
-	nbytes = PAGE_SIZE - addr;
+	nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK);
 	mapping = uprobe->inode->i_mapping;
 
 	/* Instruction at end of binary; copy only available bytes */
@@ -644,13 +641,13 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)
 
 	/* Instruction at the page-boundary; copy bytes in second page */
 	if (nbytes < bytes) {
-		if (__copy_insn(mapping, vma, uprobe->arch.insn + nbytes,
+		if (__copy_insn(mapping, filp, uprobe->arch.insn + nbytes,
 				bytes - nbytes, uprobe->offset + nbytes))
 			return -ENOMEM;
 
 		bytes = nbytes;
 	}
-	return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset);
+	return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset);
 }
 
 /*
@@ -696,7 +693,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
 	addr = (unsigned long)vaddr;
 
 	if (!(uprobe->flags & UPROBE_COPY_INSN)) {
-		ret = copy_insn(uprobe, vma, addr);
+		ret = copy_insn(uprobe, vma->vm_file);
 		if (ret)
 			return ret;
 
-- 
cgit v1.2.3


From fc36f59565861af2e897225bc3782479a26c5d5a Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 15 Jun 2012 17:43:44 +0200
Subject: uprobes: Copy_insn() should not return -ENOMEM if __copy_insn() fails

copy_insn() returns -ENOMEM if the first __copy_insn() fails,
it should return the correct error code.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120615154344.GA9601@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 08ef566da763..2db1d94d7dfc 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -641,10 +641,10 @@ static int copy_insn(struct uprobe *uprobe, struct file *filp)
 
 	/* Instruction at the page-boundary; copy bytes in second page */
 	if (nbytes < bytes) {
-		if (__copy_insn(mapping, filp, uprobe->arch.insn + nbytes,
-				bytes - nbytes, uprobe->offset + nbytes))
-			return -ENOMEM;
-
+		int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes,
+				bytes - nbytes, uprobe->offset + nbytes);
+		if (err)
+			return err;
 		bytes = nbytes;
 	}
 	return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset);
-- 
cgit v1.2.3


From eb2bf57bee42c7565032f93adaa211e2c9fcc52c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 15 Jun 2012 17:43:47 +0200
Subject: uprobes: No need to re-check vma_address() in write_opcode()

write_opcode() is called by register_for_each_vma() and
uprobe_mmap() paths. In both cases the caller has already
verified this vaddr under mmap_sem, no need to re-check.

Note also that this check is wrong anyway, we should not
truncate loff_t returned by vma_address() if we do not trust
this mapping.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120615154347.GA9604@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 2db1d94d7dfc..14c71a2aadad 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -211,7 +211,6 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	struct vm_area_struct *vma;
 	struct uprobe *uprobe;
 	unsigned long pgoff;
-	loff_t addr;
 	int ret;
 retry:
 	/* Read the page with vaddr into memory */
@@ -235,10 +234,6 @@ retry:
 	if (mapping != vma->vm_file->f_mapping)
 		goto put_out;
 
-	addr = vma_address(vma, uprobe->offset);
-	if (vaddr != (unsigned long)addr)
-		goto put_out;
-
 	ret = -ENOMEM;
 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
 	if (!new_page)
-- 
cgit v1.2.3


From d9c4a30e82614d43b55893a73f31e7284007ce82 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 15 Jun 2012 17:43:50 +0200
Subject: uprobes: Move BUG_ON(UPROBE_SWBP_INSN_SIZE) from write_opcode() to
 install_breakpoint()

write_opcode() ensures that UPROBE_SWBP_INSN doesn't cross the
page boundary. This looks a bit confusing, the check does not
depend on vaddr and it is enough to do it only once right after
install_breakpoint()->arch_uprobe_analyze_insn().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120615154350.GA9611@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 14c71a2aadad..b9c61bda9029 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -210,7 +210,6 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	void *vaddr_old, *vaddr_new;
 	struct vm_area_struct *vma;
 	struct uprobe *uprobe;
-	unsigned long pgoff;
 	int ret;
 retry:
 	/* Read the page with vaddr into memory */
@@ -251,11 +250,7 @@ retry:
 	vaddr_new = kmap_atomic(new_page);
 
 	memcpy(vaddr_new, vaddr_old, PAGE_SIZE);
-
-	/* poke the new insn in, ASSUMES we don't cross page boundary */
-	pgoff = (vaddr & ~PAGE_MASK);
-	BUG_ON(pgoff + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
-	memcpy(vaddr_new + pgoff, &opcode, UPROBE_SWBP_INSN_SIZE);
+	memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE);
 
 	kunmap_atomic(vaddr_new);
 	kunmap_atomic(vaddr_old);
@@ -699,6 +694,10 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
 		if (ret)
 			return ret;
 
+		/* write_opcode() assumes we don't cross page boundary */
+		BUG_ON((uprobe->offset & ~PAGE_MASK) +
+				UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
+
 		uprobe->flags |= UPROBE_COPY_INSN;
 	}
 
-- 
cgit v1.2.3


From 449d0d7c9fb87277175db34c009c70cb348004a8 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 15 Jun 2012 17:43:53 +0200
Subject: uprobes: Simplify the usage of uprobe->pending_list

uprobe->pending_list is only used to create the temporary list,
it has no meaning after we drop uprobes_mmap_hash(inode).

No need to initialize this node or remove it from tmp_list, and
we can use list_for_each_entry().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20120615154353.GA9614@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index b9c61bda9029..7d5c78f063ae 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -513,7 +513,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
 	uprobe->inode = igrab(inode);
 	uprobe->offset = offset;
 	init_rwsem(&uprobe->consumer_rwsem);
-	INIT_LIST_HEAD(&uprobe->pending_list);
 
 	/* add to uprobes_tree, sorted on inode:offset */
 	cur_uprobe = insert_uprobe(uprobe);
@@ -1037,7 +1036,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head)
 int uprobe_mmap(struct vm_area_struct *vma)
 {
 	struct list_head tmp_list;
-	struct uprobe *uprobe, *u;
+	struct uprobe *uprobe;
 	struct inode *inode;
 	int ret, count;
 
@@ -1055,10 +1054,9 @@ int uprobe_mmap(struct vm_area_struct *vma)
 	ret = 0;
 	count = 0;
 
-	list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
+	list_for_each_entry(uprobe, &tmp_list, pending_list) {
 		loff_t vaddr;
 
-		list_del(&uprobe->pending_list);
 		if (!ret) {
 			vaddr = vma_address(vma, uprobe->offset);
 
@@ -1106,7 +1104,7 @@ int uprobe_mmap(struct vm_area_struct *vma)
 void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 {
 	struct list_head tmp_list;
-	struct uprobe *uprobe, *u;
+	struct uprobe *uprobe;
 	struct inode *inode;
 
 	if (!atomic_read(&uprobe_events) || !valid_vma(vma, false))
@@ -1123,12 +1121,10 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
 	mutex_lock(uprobes_mmap_hash(inode));
 	build_probe_list(inode, &tmp_list);
 
-	list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
+	list_for_each_entry(uprobe, &tmp_list, pending_list) {
 		loff_t vaddr;
 
-		list_del(&uprobe->pending_list);
 		vaddr = vma_address(vma, uprobe->offset);
-
 		if (vaddr >= start && vaddr < end) {
 			/*
 			 * An unregister could have removed the probe before
-- 
cgit v1.2.3


From 816c03fbabe64fa09f66fbb64e932081af381415 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 15 Jun 2012 17:43:55 +0200
Subject: uprobes: Don't use loff_t for the valid virtual address

loff_t looks confusing when it is used for the virtual address.
Change map_info and install_breakpoint/remove_breakpoint paths
to use "unsigned long".

The patch doesn't change vma_address(), it can't return "long"
because it is used to verify the mapping. But probably this
needs some cleanups too.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Anton Arapov <anton@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120615154355.GA9622@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 7d5c78f063ae..4df84b76dd48 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -664,9 +664,8 @@ static int copy_insn(struct uprobe *uprobe, struct file *filp)
  */
 static int
 install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
-			struct vm_area_struct *vma, loff_t vaddr)
+			struct vm_area_struct *vma, unsigned long vaddr)
 {
-	unsigned long addr;
 	int ret;
 
 	/*
@@ -679,8 +678,6 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
 	if (!uprobe->consumers)
 		return -EEXIST;
 
-	addr = (unsigned long)vaddr;
-
 	if (!(uprobe->flags & UPROBE_COPY_INSN)) {
 		ret = copy_insn(uprobe, vma->vm_file);
 		if (ret)
@@ -689,7 +686,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
 		if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
 			return -ENOTSUPP;
 
-		ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, addr);
+		ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);
 		if (ret)
 			return ret;
 
@@ -709,7 +706,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
 	 * Hence increment before and decrement on failure.
 	 */
 	atomic_inc(&mm->uprobes_state.count);
-	ret = set_swbp(&uprobe->arch, mm, addr);
+	ret = set_swbp(&uprobe->arch, mm, vaddr);
 	if (ret)
 		atomic_dec(&mm->uprobes_state.count);
 
@@ -717,9 +714,9 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
 }
 
 static void
-remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr)
+remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
 {
-	if (!set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true))
+	if (!set_orig_insn(&uprobe->arch, mm, vaddr, true))
 		atomic_dec(&mm->uprobes_state.count);
 }
 
@@ -743,7 +740,7 @@ static void delete_uprobe(struct uprobe *uprobe)
 struct map_info {
 	struct map_info *next;
 	struct mm_struct *mm;
-	loff_t vaddr;
+	unsigned long vaddr;
 };
 
 static inline struct map_info *free_map_info(struct map_info *info)
@@ -837,7 +834,6 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 	while (info) {
 		struct mm_struct *mm = info->mm;
 		struct vm_area_struct *vma;
-		loff_t vaddr;
 
 		if (err)
 			goto free;
@@ -847,9 +843,8 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 		if (!vma || !valid_vma(vma, is_register))
 			goto unlock;
 
-		vaddr = vma_address(vma, uprobe->offset);
 		if (vma->vm_file->f_mapping->host != uprobe->inode ||
-						vaddr != info->vaddr)
+		    vma_address(vma, uprobe->offset) != info->vaddr)
 			goto unlock;
 
 		if (is_register) {
@@ -1055,10 +1050,8 @@ int uprobe_mmap(struct vm_area_struct *vma)
 	count = 0;
 
 	list_for_each_entry(uprobe, &tmp_list, pending_list) {
-		loff_t vaddr;
-
 		if (!ret) {
-			vaddr = vma_address(vma, uprobe->offset);
+			loff_t vaddr = vma_address(vma, uprobe->offset);
 
 			if (vaddr < vma->vm_start || vaddr >= vma->vm_end) {
 				put_uprobe(uprobe);
@@ -1122,9 +1115,8 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
 	build_probe_list(inode, &tmp_list);
 
 	list_for_each_entry(uprobe, &tmp_list, pending_list) {
-		loff_t vaddr;
+		loff_t vaddr = vma_address(vma, uprobe->offset);
 
-		vaddr = vma_address(vma, uprobe->offset);
 		if (vaddr >= start && vaddr < end) {
 			/*
 			 * An unregister could have removed the probe before
-- 
cgit v1.2.3


From 593609a59600c8377f311b300f14deacb155b9a4 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 15 Jun 2012 17:43:59 +0200
Subject: uprobes: __copy_insn() needs "loff_t offset"

1. __copy_insn() needs "loff_t offset", not "unsigned long",
   to read the file.

2. use pgoff_t for "idx" and remove the unnecessary typecast.

3. fix the typo, "&=" is not what we want

4. can't resist, rename off1 to off.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120615154359.GA9625@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 4df84b76dd48..d1b2eeb80837 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -581,12 +581,12 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
 
 static int
 __copy_insn(struct address_space *mapping, struct file *filp, char *insn,
-			unsigned long nbytes, unsigned long offset)
+			unsigned long nbytes, loff_t offset)
 {
 	struct page *page;
 	void *vaddr;
-	unsigned long off1;
-	unsigned long idx;
+	unsigned long off;
+	pgoff_t idx;
 
 	if (!filp)
 		return -EINVAL;
@@ -594,8 +594,8 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn,
 	if (!mapping->a_ops->readpage)
 		return -EIO;
 
-	idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT);
-	off1 = offset &= ~PAGE_MASK;
+	idx = offset >> PAGE_CACHE_SHIFT;
+	off = offset & ~PAGE_MASK;
 
 	/*
 	 * Ensure that the page that has the original instruction is
@@ -606,7 +606,7 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn,
 		return PTR_ERR(page);
 
 	vaddr = kmap_atomic(page);
-	memcpy(insn, vaddr + off1, nbytes);
+	memcpy(insn, vaddr + off, nbytes);
 	kunmap_atomic(vaddr);
 	page_cache_release(page);
 
-- 
cgit v1.2.3


From e227051b13956b8f71c0abecc41ad351e64671c8 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 15 Jun 2012 17:44:01 +0200
Subject: uprobes: Remove the unnecessary initialization in add_utask()

Trivial cleanup. No need to nullify ->active_uprobe after
kzalloc().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20120615154401.GA9633@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index d1b2eeb80837..f93532748bca 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1392,7 +1392,6 @@ static struct uprobe_task *add_utask(void)
 	if (unlikely(!utask))
 		return NULL;
 
-	utask->active_uprobe = NULL;
 	current->utask = utask;
 	return utask;
 }
-- 
cgit v1.2.3


From 650513979a437c32d7a0a84f0ed952a55bbb5583 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Sat, 16 Jun 2012 21:47:37 -0700
Subject: x86-64, reboot: Allow reboot=bios and reboot-cpu override on x86-64

With the revamped realmode trampoline code, it is trivial to extend
support for reboot=bios to x86-64.  Furthermore, while we are at it,
remove the restriction that only we can only override the reboot CPU
on 32 bits.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Link: http://lkml.kernel.org/n/tip-jopx7y6g6dbcx4tpal8q0jlr@git.kernel.org
---
 arch/x86/include/asm/emergency-restart.h |   2 -
 arch/x86/include/asm/realmode.h          |   3 +-
 arch/x86/include/asm/reboot.h            |   4 +-
 arch/x86/kernel/reboot.c                 |  52 +++++------
 arch/x86/realmode/rm/Makefile            |   2 +-
 arch/x86/realmode/rm/header.S            |   4 +-
 arch/x86/realmode/rm/reboot.S            | 152 +++++++++++++++++++++++++++++++
 arch/x86/realmode/rm/reboot_32.S         | 132 ---------------------------
 8 files changed, 184 insertions(+), 167 deletions(-)
 create mode 100644 arch/x86/realmode/rm/reboot.S
 delete mode 100644 arch/x86/realmode/rm/reboot_32.S

diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h
index cc70c1c78ca4..75ce3f47d204 100644
--- a/arch/x86/include/asm/emergency-restart.h
+++ b/arch/x86/include/asm/emergency-restart.h
@@ -4,9 +4,7 @@
 enum reboot_type {
 	BOOT_TRIPLE = 't',
 	BOOT_KBD = 'k',
-#ifdef CONFIG_X86_32
 	BOOT_BIOS = 'b',
-#endif
 	BOOT_ACPI = 'a',
 	BOOT_EFI = 'e',
 	BOOT_CF9 = 'p',
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index fce3f4ae5bd6..fe1ec5bcd846 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -21,8 +21,9 @@ struct real_mode_header {
 	u32	wakeup_header;
 #endif
 	/* APM/BIOS reboot */
-#ifdef CONFIG_X86_32
 	u32	machine_real_restart_asm;
+#ifdef CONFIG_X86_64
+	u32	machine_real_restart_seg;
 #endif
 };
 
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 92f297069e87..a82c4f1b4d83 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -18,8 +18,8 @@ extern struct machine_ops machine_ops;
 
 void native_machine_crash_shutdown(struct pt_regs *regs);
 void native_machine_shutdown(void);
-void machine_real_restart(unsigned int type);
-/* These must match dispatch_table in reboot_32.S */
+void __noreturn machine_real_restart(unsigned int type);
+/* These must match dispatch in arch/x86/realmore/rm/reboot.S */
 #define MRR_BIOS	0
 #define MRR_APM		1
 
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 25b48edb847c..6ddb9cd0ced0 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -20,14 +20,12 @@
 #include <asm/virtext.h>
 #include <asm/cpu.h>
 #include <asm/nmi.h>
+#include <asm/smp.h>
 
-#ifdef CONFIG_X86_32
-# include <linux/ctype.h>
-# include <linux/mc146818rtc.h>
-# include <asm/realmode.h>
-#else
-# include <asm/x86_init.h>
-#endif
+#include <linux/ctype.h>
+#include <linux/mc146818rtc.h>
+#include <asm/realmode.h>
+#include <asm/x86_init.h>
 
 /*
  * Power off function, if any
@@ -49,7 +47,7 @@ int reboot_force;
  */
 static int reboot_default = 1;
 
-#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
 static int reboot_cpu = -1;
 #endif
 
@@ -67,8 +65,8 @@ bool port_cf9_safe = false;
  * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
  * warm   Don't set the cold reboot flag
  * cold   Set the cold reboot flag
- * bios   Reboot by jumping through the BIOS (only for X86_32)
- * smp    Reboot by executing reset on BSP or other CPU (only for X86_32)
+ * bios   Reboot by jumping through the BIOS
+ * smp    Reboot by executing reset on BSP or other CPU
  * triple Force a triple fault (init)
  * kbd    Use the keyboard controller. cold reset (default)
  * acpi   Use the RESET_REG in the FADT
@@ -95,7 +93,6 @@ static int __init reboot_setup(char *str)
 			reboot_mode = 0;
 			break;
 
-#ifdef CONFIG_X86_32
 #ifdef CONFIG_SMP
 		case 's':
 			if (isdigit(*(str+1))) {
@@ -112,7 +109,6 @@ static int __init reboot_setup(char *str)
 #endif /* CONFIG_SMP */
 
 		case 'b':
-#endif
 		case 'a':
 		case 'k':
 		case 't':
@@ -138,7 +134,6 @@ static int __init reboot_setup(char *str)
 __setup("reboot=", reboot_setup);
 
 
-#ifdef CONFIG_X86_32
 /*
  * Reboot options and system auto-detection code provided by
  * Dell Inc. so their systems "just work". :-)
@@ -157,11 +152,8 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
 	return 0;
 }
 
-void machine_real_restart(unsigned int type)
+void __noreturn machine_real_restart(unsigned int type)
 {
-	void (*restart_lowmem)(unsigned int) = (void (*)(unsigned int))
-		real_mode_header->machine_real_restart_asm;
-
 	local_irq_disable();
 
 	/*
@@ -181,7 +173,11 @@ void machine_real_restart(unsigned int type)
 	/*
 	 * Switch back to the initial page table.
 	 */
+#ifdef CONFIG_X86_32
 	load_cr3(initial_page_table);
+#else
+	write_cr3(real_mode_header->trampoline_pgd);
+#endif
 
 	/*
 	 * Write 0x1234 to absolute memory location 0x472.  The BIOS reads
@@ -192,14 +188,21 @@ void machine_real_restart(unsigned int type)
 	*((unsigned short *)0x472) = reboot_mode;
 
 	/* Jump to the identity-mapped low memory code */
-	restart_lowmem(type);
+#ifdef CONFIG_X86_32
+	asm volatile("jmpl *%0" : :
+		     "rm" (real_mode_header->machine_real_restart_asm),
+		     "a" (type));
+#else
+	asm volatile("ljmpl *%0" : :
+		     "m" (real_mode_header->machine_real_restart_asm),
+		     "D" (type));
+#endif
+	unreachable();
 }
 #ifdef CONFIG_APM_MODULE
 EXPORT_SYMBOL(machine_real_restart);
 #endif
 
-#endif /* CONFIG_X86_32 */
-
 /*
  * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
  */
@@ -223,11 +226,9 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d)
 }
 
 /*
- * This is a single dmi_table handling all reboot quirks.  Note that
- * REBOOT_BIOS is only available for 32bit
+ * This is a single dmi_table handling all reboot quirks.
  */
 static struct dmi_system_id __initdata reboot_dmi_table[] = {
-#ifdef CONFIG_X86_32
 	{	/* Handle problems with rebooting on Dell E520's */
 		.callback = set_bios_reboot,
 		.ident = "Dell E520",
@@ -377,7 +378,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
 		},
 	},
-#endif /* CONFIG_X86_32 */
 
 	{	/* Handle reboot issue on Acer Aspire one */
 		.callback = set_kbd_reboot,
@@ -576,13 +576,11 @@ static void native_machine_emergency_restart(void)
 			reboot_type = BOOT_KBD;
 			break;
 
-#ifdef CONFIG_X86_32
 		case BOOT_BIOS:
 			machine_real_restart(MRR_BIOS);
 
 			reboot_type = BOOT_KBD;
 			break;
-#endif
 
 		case BOOT_ACPI:
 			acpi_reboot();
@@ -624,12 +622,10 @@ void native_machine_shutdown(void)
 	/* The boot cpu is always logical cpu 0 */
 	int reboot_cpu_id = 0;
 
-#ifdef CONFIG_X86_32
 	/* See if there has been given a command line override */
 	if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) &&
 		cpu_online(reboot_cpu))
 		reboot_cpu_id = reboot_cpu;
-#endif
 
 	/* Make certain the cpu I'm about to reboot on is online */
 	if (!cpu_online(reboot_cpu_id))
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 5b84a2d30888..b2d534cab25f 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -22,7 +22,7 @@ wakeup-objs	+= video-bios.o
 realmode-y			+= header.o
 realmode-y			+= trampoline_$(BITS).o
 realmode-y			+= stack.o
-realmode-$(CONFIG_X86_32)	+= reboot_32.o
+realmode-y			+= reboot.o
 realmode-$(CONFIG_ACPI_SLEEP)	+= $(wakeup-objs)
 
 targets	+= $(realmode-y)
diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S
index fadf48378ada..a28221d94e69 100644
--- a/arch/x86/realmode/rm/header.S
+++ b/arch/x86/realmode/rm/header.S
@@ -6,6 +6,7 @@
 
 #include <linux/linkage.h>
 #include <asm/page_types.h>
+#include <asm/segment.h>
 
 #include "realmode.h"
 	
@@ -28,8 +29,9 @@ GLOBAL(real_mode_header)
 	.long	pa_wakeup_header
 #endif
 	/* APM/BIOS reboot */
-#ifdef CONFIG_X86_32
 	.long	pa_machine_real_restart_asm
+#ifdef CONFIG_X86_64
+	.long	__KERNEL32_CS
 #endif
 END(real_mode_header)
 
diff --git a/arch/x86/realmode/rm/reboot.S b/arch/x86/realmode/rm/reboot.S
new file mode 100644
index 000000000000..6bf8feac5557
--- /dev/null
+++ b/arch/x86/realmode/rm/reboot.S
@@ -0,0 +1,152 @@
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/segment.h>
+#include <asm/page_types.h>
+#include <asm/processor-flags.h>
+#include <asm/msr-index.h>
+#include "realmode.h"
+
+/*
+ * The following code and data reboots the machine by switching to real
+ * mode and jumping to the BIOS reset entry point, as if the CPU has
+ * really been reset.  The previous version asked the keyboard
+ * controller to pulse the CPU reset line, which is more thorough, but
+ * doesn't work with at least one type of 486 motherboard.  It is easy
+ * to stop this code working; hence the copious comments.
+ *
+ * This code is called with the restart type (0 = BIOS, 1 = APM) in
+ * the primary argument register (%eax for 32 bit, %edi for 64 bit).
+ */
+	.section ".text32", "ax"
+	.code32
+ENTRY(machine_real_restart_asm)
+
+#ifdef CONFIG_X86_64
+
+	/* Disable paging to drop us out of long mode */
+	movl	%cr0, %eax
+	andl	$~X86_CR0_PG, %eax
+	movl	%eax, %cr0
+	jmp	1f	/* "A branch" may be needed here, assume near is OK */
+
+1:
+	xorl	%eax, %eax
+	xorl	%edx, %edx
+	movl	$MSR_EFER, %ecx
+	wrmsr
+
+	movl	%edi, %eax
+	
+#endif /* CONFIG_X86_64 */
+	
+	/* Set up the IDT for real mode. */
+	lidtl	pa_machine_real_restart_idt
+
+	/*
+	 * Set up a GDT from which we can load segment descriptors for real
+	 * mode.  The GDT is not used in real mode; it is just needed here to
+	 * prepare the descriptors.
+	 */
+	lgdtl	pa_machine_real_restart_gdt
+
+	/*
+	 * Load the data segment registers with 16-bit compatible values
+	 */
+	movl	$16, %ecx
+	movl	%ecx, %ds
+	movl	%ecx, %es
+	movl	%ecx, %fs
+	movl	%ecx, %gs
+	movl	%ecx, %ss
+	ljmpw	$8, $1f
+
+/*
+ * This is 16-bit protected mode code to disable paging and the cache,
+ * switch to real mode and jump to the BIOS reset code.
+ *
+ * The instruction that switches to real mode by writing to CR0 must be
+ * followed immediately by a far jump instruction, which set CS to a
+ * valid value for real mode, and flushes the prefetch queue to avoid
+ * running instructions that have already been decoded in protected
+ * mode.
+ *
+ * Clears all the flags except ET, especially PG (paging), PE
+ * (protected-mode enable) and TS (task switch for coprocessor state
+ * save).  Flushes the TLB after paging has been disabled.  Sets CD and
+ * NW, to disable the cache on a 486, and invalidates the cache.  This
+ * is more like the state of a 486 after reset.  I don't know if
+ * something else should be done for other chips.
+ *
+ * More could be done here to set up the registers as if a CPU reset had
+ * occurred; hopefully real BIOSs don't assume much.  This is not the
+ * actual BIOS entry point, anyway (that is at 0xfffffff0).
+ *
+ * Most of this work is probably excessive, but it is what is tested.
+ */
+	.text
+	.code16
+
+	.balign	16
+machine_real_restart_asm16:
+1:
+	xorl	%ecx, %ecx
+	movl	%cr0, %edx
+	andl	$0x00000011, %edx
+	orl	$0x60000000, %edx
+	movl	%edx, %cr0
+	movl	%ecx, %cr3
+	movl	%cr0, %edx
+	testl	$0x60000000, %edx	/* If no cache bits -> no wbinvd */
+	jz	2f
+	wbinvd
+2:
+	andb	$0x10, %dl
+	movl	%edx, %cr0
+	LJMPW_RM(3f)
+3:
+	andw	%ax, %ax
+	jz	bios
+
+apm:
+	movw	$0x1000, %ax
+	movw	%ax, %ss
+	movw	$0xf000, %sp
+	movw	$0x5307, %ax
+	movw	$0x0001, %bx
+	movw	$0x0003, %cx
+	int	$0x15
+	/* This should never return... */
+
+bios:
+	ljmpw	$0xf000, $0xfff0
+
+	.section ".rodata", "a"
+
+	.balign	16
+GLOBAL(machine_real_restart_idt)
+	.word	0xffff		/* Length - real mode default value */
+	.long	0		/* Base - real mode default value */
+END(machine_real_restart_idt)
+
+	.balign	16
+GLOBAL(machine_real_restart_gdt)
+	/* Self-pointer */
+	.word	0xffff		/* Length - real mode default value */
+	.long	pa_machine_real_restart_gdt
+	.word	0
+
+	/*
+	 * 16-bit code segment pointing to real_mode_seg
+	 * Selector value 8
+	 */
+	.word	0xffff		/* Limit */
+	.long	0x9b000000 + pa_real_mode_base
+	.word	0
+
+	/*
+	 * 16-bit data segment with the selector value 16 = 0x10 and
+	 * base value 0x100; since this is consistent with real mode
+	 * semantics we don't have to reload the segments once CR0.PE = 0.
+	 */
+	.quad	GDT_ENTRY(0x0093, 0x100, 0xffff)
+END(machine_real_restart_gdt)
diff --git a/arch/x86/realmode/rm/reboot_32.S b/arch/x86/realmode/rm/reboot_32.S
deleted file mode 100644
index 114044876b3d..000000000000
--- a/arch/x86/realmode/rm/reboot_32.S
+++ /dev/null
@@ -1,132 +0,0 @@
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/segment.h>
-#include <asm/page_types.h>
-#include "realmode.h"
-
-/*
- * The following code and data reboots the machine by switching to real
- * mode and jumping to the BIOS reset entry point, as if the CPU has
- * really been reset.  The previous version asked the keyboard
- * controller to pulse the CPU reset line, which is more thorough, but
- * doesn't work with at least one type of 486 motherboard.  It is easy
- * to stop this code working; hence the copious comments.
- *
- * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax.
- */
-	.section ".text32", "ax"
-	.code32
-
-	.balign	16
-ENTRY(machine_real_restart_asm)
-	/* Set up the IDT for real mode. */
-	lidtl	pa_machine_real_restart_idt
-
-	/*
-	 * Set up a GDT from which we can load segment descriptors for real
-	 * mode.  The GDT is not used in real mode; it is just needed here to
-	 * prepare the descriptors.
-	 */
-	lgdtl	pa_machine_real_restart_gdt
-
-	/*
-	 * Load the data segment registers with 16-bit compatible values
-	 */
-	movl	$16, %ecx
-	movl	%ecx, %ds
-	movl	%ecx, %es
-	movl	%ecx, %fs
-	movl	%ecx, %gs
-	movl	%ecx, %ss
-	ljmpw	$8, $1f
-
-/*
- * This is 16-bit protected mode code to disable paging and the cache,
- * switch to real mode and jump to the BIOS reset code.
- *
- * The instruction that switches to real mode by writing to CR0 must be
- * followed immediately by a far jump instruction, which set CS to a
- * valid value for real mode, and flushes the prefetch queue to avoid
- * running instructions that have already been decoded in protected
- * mode.
- *
- * Clears all the flags except ET, especially PG (paging), PE
- * (protected-mode enable) and TS (task switch for coprocessor state
- * save).  Flushes the TLB after paging has been disabled.  Sets CD and
- * NW, to disable the cache on a 486, and invalidates the cache.  This
- * is more like the state of a 486 after reset.  I don't know if
- * something else should be done for other chips.
- *
- * More could be done here to set up the registers as if a CPU reset had
- * occurred; hopefully real BIOSs don't assume much.  This is not the
- * actual BIOS entry point, anyway (that is at 0xfffffff0).
- *
- * Most of this work is probably excessive, but it is what is tested.
- */
-	.text
-	.code16
-
-	.balign	16
-machine_real_restart_asm16:
-1:
-	xorl	%ecx, %ecx
-	movl	%cr0, %edx
-	andl	$0x00000011, %edx
-	orl	$0x60000000, %edx
-	movl	%edx, %cr0
-	movl	%ecx, %cr3
-	movl	%cr0, %edx
-	testl	$0x60000000, %edx	/* If no cache bits -> no wbinvd */
-	jz	2f
-	wbinvd
-2:
-	andb	$0x10, %dl
-	movl	%edx, %cr0
-	LJMPW_RM(3f)
-3:
-	andw	%ax, %ax
-	jz	bios
-
-apm:
-	movw	$0x1000, %ax
-	movw	%ax, %ss
-	movw	$0xf000, %sp
-	movw	$0x5307, %ax
-	movw	$0x0001, %bx
-	movw	$0x0003, %cx
-	int	$0x15
-	/* This should never return... */
-
-bios:
-	ljmpw	$0xf000, $0xfff0
-
-	.section ".rodata", "a"
-
-	.balign	16
-GLOBAL(machine_real_restart_idt)
-	.word	0xffff		/* Length - real mode default value */
-	.long	0		/* Base - real mode default value */
-END(machine_real_restart_idt)
-
-	.balign	16
-GLOBAL(machine_real_restart_gdt)
-	/* Self-pointer */
-	.word	0xffff		/* Length - real mode default value */
-	.long	pa_machine_real_restart_gdt
-	.word	0
-
-	/*
-	 * 16-bit code segment pointing to real_mode_seg
-	 * Selector value 8
-	 */
-	.word	0xffff		/* Limit */
-	.long	0x9b000000 + pa_real_mode_base
-	.word	0
-
-	/*
-	 * 16-bit data segment with the selector value 16 = 0x10 and
-	 * base value 0x100; since this is consistent with real mode
-	 * semantics we don't have to reload the segments once CR0.PE = 0.
-	 */
-	.quad	GDT_ENTRY(0x0093, 0x100, 0xffff)
-END(machine_real_restart_gdt)
-- 
cgit v1.2.3


From abf71f3066740f3b59c3f731b4b68ed335f7b24d Mon Sep 17 00:00:00 2001
From: Ido Yariv <ido@wizery.com>
Date: Fri, 15 Jun 2012 18:10:55 +0300
Subject: x86/vsmp: Fix vector_allocation_domain's return value

Commit 8637e38a ("x86/apic: Avoid useless scanning thru a
cpumask in assign_irq_vector()") modified
vector_allocation_domain() to return a boolean indicating if
cpumask is dynamic or static. Adjust vSMP's callback
implementation accordingly.

Signed-off-by: Ido Yariv <ido@wizery.com>
Acked-by: Shai Fultheim <shai@scalemp.com>
Cc: Alexander Gordeev <agordeev@redhat.com>
Link: http://lkml.kernel.org/r/1339773055-27397-1-git-send-email-ido@wizery.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/vsmp_64.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 3f0285ac00fa..fa5adb7c228c 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -208,9 +208,10 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
  * In vSMP, all cpus should be capable of handling interrupts, regardless of
  * the APIC used.
  */
-static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask)
+static bool fill_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	cpumask_setall(retmask);
+	return false;
 }
 
 static void vsmp_apic_post_init(void)
-- 
cgit v1.2.3


From 76958a61e42fb6277a8431eb17e4bdb24176f1b7 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 15 Jun 2012 19:06:44 +0200
Subject: perf/x86/amd: Fix RDPMC index calculation for AMD family 15h

The RDPMC index calculation is wrong for AMD family 15h
(X86_FEATURE_ PERFCTR_CORE set). This leads to a #GP when
accessing the counter:

 Pid: 2237, comm: syslog-ng Not tainted 3.5.0-rc1-perf-x86_64-standard-g130ff90 #135 AMD Pike/Pike
 RIP: 0010:[<ffffffff8100dc33>]  [<ffffffff8100dc33>] x86_perf_event_update+0x27/0x66

While the msr address offset is (index << 1) we must use index to
select the correct rdpmc.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Vince Weaver <vweaver1@eecs.utk.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 766c76d5ec4c..d1f38c9509d0 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -823,7 +823,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
 	} else {
 		hwc->config_base = x86_pmu_config_addr(hwc->idx);
 		hwc->event_base  = x86_pmu_event_addr(hwc->idx);
-		hwc->event_base_rdpmc = x86_pmu_addr_offset(hwc->idx);
+		hwc->event_base_rdpmc = hwc->idx;
 	}
 }
 
-- 
cgit v1.2.3


From 4b4969b14490a4f65b572b8f180164181104b5e1 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Fri, 15 Jun 2012 14:31:30 +0800
Subject: perf: Export perf_assign_events()

Export perf_assign_events() so the uncore code can use it to
schedule events.

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1339741902-8449-2-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c | 6 +++---
 arch/x86/kernel/cpu/perf_event.h | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index d1f38c9509d0..6d32aefc9dbd 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -626,7 +626,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
 	c = sched->constraints[sched->state.event];
 
 	/* Prefer fixed purpose counters */
-	if (x86_pmu.num_counters_fixed) {
+	if (c->idxmsk64 & (~0ULL << X86_PMC_IDX_FIXED)) {
 		idx = X86_PMC_IDX_FIXED;
 		for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
 			if (!__test_and_set_bit(idx, sched->state.used))
@@ -693,8 +693,8 @@ static bool perf_sched_next_event(struct perf_sched *sched)
 /*
  * Assign a counter for each event.
  */
-static int perf_assign_events(struct event_constraint **constraints, int n,
-			      int wmin, int wmax, int *assign)
+int perf_assign_events(struct event_constraint **constraints, int n,
+			int wmin, int wmax, int *assign)
 {
 	struct perf_sched sched;
 
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 3df3de9452a9..83238f2a12b2 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -481,6 +481,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
 
 void x86_pmu_enable_all(int added);
 
+int perf_assign_events(struct event_constraint **constraints, int n,
+			int wmin, int wmax, int *assign);
 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
 
 void x86_pmu_stop(struct perf_event *event, int flags);
-- 
cgit v1.2.3


From fbfc623f8231c8d8c78aab5841e9c6e5811ab638 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Fri, 15 Jun 2012 14:31:31 +0800
Subject: perf: Avoid race between cpu hotplug and installing event

perf_event_open() requires the cpu on which to install event is online,
but the cpu can go offline after perf_event_open checks that. Add a
get_online_cpus()/put_online_cpus() pair to avoid the race.

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1339741902-8449-3-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index d7d71d6ec972..31d182e01549 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6252,6 +6252,8 @@ SYSCALL_DEFINE5(perf_event_open,
 		}
 	}
 
+	get_online_cpus();
+
 	event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
 				 NULL, NULL);
 	if (IS_ERR(event)) {
@@ -6391,6 +6393,8 @@ SYSCALL_DEFINE5(perf_event_open,
 	perf_unpin_context(ctx);
 	mutex_unlock(&ctx->mutex);
 
+	put_online_cpus();
+
 	event->owner = current;
 
 	mutex_lock(&current->perf_event_mutex);
@@ -6419,6 +6423,7 @@ err_context:
 err_alloc:
 	free_event(event);
 err_task:
+	put_online_cpus();
 	if (task)
 		put_task_struct(task);
 err_group_fd:
-- 
cgit v1.2.3


From e2d37cd213dcc0aeb3db4b37b9bd1710fe36fbf7 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Fri, 15 Jun 2012 14:31:32 +0800
Subject: perf: Allow the PMU driver to choose the CPU on which to install
 events

Allow the pmu->event_init callback to change event->cpu, so the PMU driver
can choose the CPU on which to install events.

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1339741902-8449-4-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 31d182e01549..fa36a39e8bb7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6306,7 +6306,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	/*
 	 * Get the target context (task or percpu):
 	 */
-	ctx = find_get_context(pmu, task, cpu);
+	ctx = find_get_context(pmu, task, event->cpu);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto err_alloc;
@@ -6379,16 +6379,16 @@ SYSCALL_DEFINE5(perf_event_open,
 	mutex_lock(&ctx->mutex);
 
 	if (move_group) {
-		perf_install_in_context(ctx, group_leader, cpu);
+		perf_install_in_context(ctx, group_leader, event->cpu);
 		get_ctx(ctx);
 		list_for_each_entry(sibling, &group_leader->sibling_list,
 				    group_entry) {
-			perf_install_in_context(ctx, sibling, cpu);
+			perf_install_in_context(ctx, sibling, event->cpu);
 			get_ctx(ctx);
 		}
 	}
 
-	perf_install_in_context(ctx, event, cpu);
+	perf_install_in_context(ctx, event, event->cpu);
 	++ctx->generation;
 	perf_unpin_context(ctx);
 	mutex_unlock(&ctx->mutex);
-- 
cgit v1.2.3


From 0cda4c023132aa93f2dd94811061f812e88daf4c Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Fri, 15 Jun 2012 14:31:33 +0800
Subject: perf: Introduce perf_pmu_migrate_context()

Originally from Peter Zijlstra. The helper migrates perf events
from one cpu to another cpu.

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1339741902-8449-5-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h |  2 ++
 kernel/events/core.c       | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1ce887abcc5c..76c5c8b724a7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1107,6 +1107,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
 				struct task_struct *task,
 				perf_overflow_handler_t callback,
 				void *context);
+extern void perf_pmu_migrate_context(struct pmu *pmu,
+				int src_cpu, int dst_cpu);
 extern u64 perf_event_read_value(struct perf_event *event,
 				 u64 *enabled, u64 *running);
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fa36a39e8bb7..f1cf0edeb39a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1645,6 +1645,8 @@ perf_install_in_context(struct perf_event_context *ctx,
 	lockdep_assert_held(&ctx->mutex);
 
 	event->ctx = ctx;
+	if (event->cpu != -1)
+		event->cpu = cpu;
 
 	if (!task) {
 		/*
@@ -6379,6 +6381,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	mutex_lock(&ctx->mutex);
 
 	if (move_group) {
+		synchronize_rcu();
 		perf_install_in_context(ctx, group_leader, event->cpu);
 		get_ctx(ctx);
 		list_for_each_entry(sibling, &group_leader->sibling_list,
@@ -6484,6 +6487,39 @@ err:
 }
 EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
 
+void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
+{
+	struct perf_event_context *src_ctx;
+	struct perf_event_context *dst_ctx;
+	struct perf_event *event, *tmp;
+	LIST_HEAD(events);
+
+	src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
+	dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
+
+	mutex_lock(&src_ctx->mutex);
+	list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
+				 event_entry) {
+		perf_remove_from_context(event);
+		put_ctx(src_ctx);
+		list_add(&event->event_entry, &events);
+	}
+	mutex_unlock(&src_ctx->mutex);
+
+	synchronize_rcu();
+
+	mutex_lock(&dst_ctx->mutex);
+	list_for_each_entry_safe(event, tmp, &events, event_entry) {
+		list_del(&event->event_entry);
+		if (event->state >= PERF_EVENT_STATE_OFF)
+			event->state = PERF_EVENT_STATE_INACTIVE;
+		perf_install_in_context(dst_ctx, event, dst_cpu);
+		get_ctx(dst_ctx);
+	}
+	mutex_unlock(&dst_ctx->mutex);
+}
+EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
+
 static void sync_child_event(struct perf_event *child_event,
 			       struct task_struct *child)
 {
-- 
cgit v1.2.3


From 087bfbb032691262f2f7d52b910652450c5554b8 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Fri, 15 Jun 2012 14:31:34 +0800
Subject: perf/x86: Add generic Intel uncore PMU support

This patch adds the generic Intel uncore PMU support, including helper
functions that add/delete uncore events, a hrtimer that periodically
polls the counters to avoid overflow and code that places all events
for a particular socket onto a single cpu.

The code design is based on the structure of Sandy Bridge-EP's uncore
subsystem, which consists of a variety of components, each component
contains one or more "boxes".

(Tooling support follows in the next patches.)

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1339741902-8449-6-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/Makefile                  |   4 +-
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 878 ++++++++++++++++++++++++++
 arch/x86/kernel/cpu/perf_event_intel_uncore.h | 204 ++++++
 3 files changed, 1085 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/perf_event_intel_uncore.c
 create mode 100644 arch/x86/kernel/cpu/perf_event_intel_uncore.h

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 6ab6aa2fdfdd..bac4c3804cc7 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -32,7 +32,9 @@ obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
 
 ifdef CONFIG_PERF_EVENTS
 obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_amd.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_p4.o perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_p4.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_uncore.o
 endif
 
 obj-$(CONFIG_X86_MCE)			+= mcheck/
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
new file mode 100644
index 000000000000..fe76a07dfdbc
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -0,0 +1,878 @@
+#include "perf_event_intel_uncore.h"
+
+static struct intel_uncore_type *empty_uncore[] = { NULL, };
+static struct intel_uncore_type **msr_uncores = empty_uncore;
+
+/* mask of cpus that collect uncore events */
+static cpumask_t uncore_cpu_mask;
+
+/* constraint for the fixed counter */
+static struct event_constraint constraint_fixed =
+	EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
+
+static void uncore_assign_hw_event(struct intel_uncore_box *box,
+				struct perf_event *event, int idx)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	hwc->idx = idx;
+	hwc->last_tag = ++box->tags[idx];
+
+	if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
+		hwc->event_base = uncore_msr_fixed_ctr(box);
+		hwc->config_base = uncore_msr_fixed_ctl(box);
+		return;
+	}
+
+	hwc->config_base = uncore_msr_event_ctl(box, hwc->idx);
+	hwc->event_base =  uncore_msr_perf_ctr(box, hwc->idx);
+}
+
+static void uncore_perf_event_update(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	u64 prev_count, new_count, delta;
+	int shift;
+
+	if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
+		shift = 64 - uncore_fixed_ctr_bits(box);
+	else
+		shift = 64 - uncore_perf_ctr_bits(box);
+
+	/* the hrtimer might modify the previous event value */
+again:
+	prev_count = local64_read(&event->hw.prev_count);
+	new_count = uncore_read_counter(box, event);
+	if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
+		goto again;
+
+	delta = (new_count << shift) - (prev_count << shift);
+	delta >>= shift;
+
+	local64_add(delta, &event->count);
+}
+
+/*
+ * The overflow interrupt is unavailable for SandyBridge-EP, is broken
+ * for SandyBridge. So we use hrtimer to periodically poll the counter
+ * to avoid overflow.
+ */
+static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
+{
+	struct intel_uncore_box *box;
+	unsigned long flags;
+	int bit;
+
+	box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
+	if (!box->n_active || box->cpu != smp_processor_id())
+		return HRTIMER_NORESTART;
+	/*
+	 * disable local interrupt to prevent uncore_pmu_event_start/stop
+	 * to interrupt the update process
+	 */
+	local_irq_save(flags);
+
+	for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
+		uncore_perf_event_update(box, box->events[bit]);
+
+	local_irq_restore(flags);
+
+	hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL));
+	return HRTIMER_RESTART;
+}
+
+static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
+{
+	__hrtimer_start_range_ns(&box->hrtimer,
+			ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0,
+			HRTIMER_MODE_REL_PINNED, 0);
+}
+
+static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
+{
+	hrtimer_cancel(&box->hrtimer);
+}
+
+static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
+{
+	hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	box->hrtimer.function = uncore_pmu_hrtimer;
+}
+
+struct intel_uncore_box *uncore_alloc_box(int cpu)
+{
+	struct intel_uncore_box *box;
+
+	box = kmalloc_node(sizeof(*box), GFP_KERNEL | __GFP_ZERO,
+			   cpu_to_node(cpu));
+	if (!box)
+		return NULL;
+
+	uncore_pmu_init_hrtimer(box);
+	atomic_set(&box->refcnt, 1);
+	box->cpu = -1;
+	box->phys_id = -1;
+
+	return box;
+}
+
+static struct intel_uncore_box *
+uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
+{
+	return *per_cpu_ptr(pmu->box, cpu);
+}
+
+static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
+{
+	return container_of(event->pmu, struct intel_uncore_pmu, pmu);
+}
+
+static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
+{
+	/*
+	 * perf core schedules event on the basis of cpu, uncore events are
+	 * collected by one of the cpus inside a physical package.
+	 */
+	return uncore_pmu_to_box(uncore_event_to_pmu(event),
+				 smp_processor_id());
+}
+
+static int uncore_collect_events(struct intel_uncore_box *box,
+				struct perf_event *leader, bool dogrp)
+{
+	struct perf_event *event;
+	int n, max_count;
+
+	max_count = box->pmu->type->num_counters;
+	if (box->pmu->type->fixed_ctl)
+		max_count++;
+
+	if (box->n_events >= max_count)
+		return -EINVAL;
+
+	n = box->n_events;
+	box->event_list[n] = leader;
+	n++;
+	if (!dogrp)
+		return n;
+
+	list_for_each_entry(event, &leader->sibling_list, group_entry) {
+		if (event->state <= PERF_EVENT_STATE_OFF)
+			continue;
+
+		if (n >= max_count)
+			return -EINVAL;
+
+		box->event_list[n] = event;
+		n++;
+	}
+	return n;
+}
+
+static struct event_constraint *
+uncore_event_constraint(struct intel_uncore_type *type,
+			struct perf_event *event)
+{
+	struct event_constraint *c;
+
+	if (event->hw.config == ~0ULL)
+		return &constraint_fixed;
+
+	if (type->constraints) {
+		for_each_event_constraint(c, type->constraints) {
+			if ((event->hw.config & c->cmask) == c->code)
+				return c;
+		}
+	}
+
+	return &type->unconstrainted;
+}
+
+static int uncore_assign_events(struct intel_uncore_box *box,
+				int assign[], int n)
+{
+	unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
+	struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
+	int i, ret, wmin, wmax;
+	struct hw_perf_event *hwc;
+
+	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
+
+	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+		c = uncore_event_constraint(box->pmu->type,
+				box->event_list[i]);
+		constraints[i] = c;
+		wmin = min(wmin, c->weight);
+		wmax = max(wmax, c->weight);
+	}
+
+	/* fastpath, try to reuse previous register */
+	for (i = 0; i < n; i++) {
+		hwc = &box->event_list[i]->hw;
+		c = constraints[i];
+
+		/* never assigned */
+		if (hwc->idx == -1)
+			break;
+
+		/* constraint still honored */
+		if (!test_bit(hwc->idx, c->idxmsk))
+			break;
+
+		/* not already used */
+		if (test_bit(hwc->idx, used_mask))
+			break;
+
+		__set_bit(hwc->idx, used_mask);
+		assign[i] = hwc->idx;
+	}
+	if (i == n)
+		return 0;
+
+	/* slow path */
+	ret = perf_assign_events(constraints, n, wmin, wmax, assign);
+	return ret ? -EINVAL : 0;
+}
+
+static void uncore_pmu_event_start(struct perf_event *event, int flags)
+{
+	struct intel_uncore_box *box = uncore_event_to_box(event);
+	int idx = event->hw.idx;
+
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
+		return;
+
+	event->hw.state = 0;
+	box->events[idx] = event;
+	box->n_active++;
+	__set_bit(idx, box->active_mask);
+
+	local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
+	uncore_enable_event(box, event);
+
+	if (box->n_active == 1) {
+		uncore_enable_box(box);
+		uncore_pmu_start_hrtimer(box);
+	}
+}
+
+static void uncore_pmu_event_stop(struct perf_event *event, int flags)
+{
+	struct intel_uncore_box *box = uncore_event_to_box(event);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
+		uncore_disable_event(box, event);
+		box->n_active--;
+		box->events[hwc->idx] = NULL;
+		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+		hwc->state |= PERF_HES_STOPPED;
+
+		if (box->n_active == 0) {
+			uncore_disable_box(box);
+			uncore_pmu_cancel_hrtimer(box);
+		}
+	}
+
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		/*
+		 * Drain the remaining delta count out of a event
+		 * that we are disabling:
+		 */
+		uncore_perf_event_update(box, event);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+}
+
+static int uncore_pmu_event_add(struct perf_event *event, int flags)
+{
+	struct intel_uncore_box *box = uncore_event_to_box(event);
+	struct hw_perf_event *hwc = &event->hw;
+	int assign[UNCORE_PMC_IDX_MAX];
+	int i, n, ret;
+
+	if (!box)
+		return -ENODEV;
+
+	ret = n = uncore_collect_events(box, event, false);
+	if (ret < 0)
+		return ret;
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+	if (!(flags & PERF_EF_START))
+		hwc->state |= PERF_HES_ARCH;
+
+	ret = uncore_assign_events(box, assign, n);
+	if (ret)
+		return ret;
+
+	/* save events moving to new counters */
+	for (i = 0; i < box->n_events; i++) {
+		event = box->event_list[i];
+		hwc = &event->hw;
+
+		if (hwc->idx == assign[i] &&
+			hwc->last_tag == box->tags[assign[i]])
+			continue;
+		/*
+		 * Ensure we don't accidentally enable a stopped
+		 * counter simply because we rescheduled.
+		 */
+		if (hwc->state & PERF_HES_STOPPED)
+			hwc->state |= PERF_HES_ARCH;
+
+		uncore_pmu_event_stop(event, PERF_EF_UPDATE);
+	}
+
+	/* reprogram moved events into new counters */
+	for (i = 0; i < n; i++) {
+		event = box->event_list[i];
+		hwc = &event->hw;
+
+		if (hwc->idx != assign[i] ||
+			hwc->last_tag != box->tags[assign[i]])
+			uncore_assign_hw_event(box, event, assign[i]);
+		else if (i < box->n_events)
+			continue;
+
+		if (hwc->state & PERF_HES_ARCH)
+			continue;
+
+		uncore_pmu_event_start(event, 0);
+	}
+	box->n_events = n;
+
+	return 0;
+}
+
+static void uncore_pmu_event_del(struct perf_event *event, int flags)
+{
+	struct intel_uncore_box *box = uncore_event_to_box(event);
+	int i;
+
+	uncore_pmu_event_stop(event, PERF_EF_UPDATE);
+
+	for (i = 0; i < box->n_events; i++) {
+		if (event == box->event_list[i]) {
+			while (++i < box->n_events)
+				box->event_list[i - 1] = box->event_list[i];
+
+			--box->n_events;
+			break;
+		}
+	}
+
+	event->hw.idx = -1;
+	event->hw.last_tag = ~0ULL;
+}
+
+static void uncore_pmu_event_read(struct perf_event *event)
+{
+	struct intel_uncore_box *box = uncore_event_to_box(event);
+	uncore_perf_event_update(box, event);
+}
+
+/*
+ * validation ensures the group can be loaded onto the
+ * PMU if it was the only group available.
+ */
+static int uncore_validate_group(struct intel_uncore_pmu *pmu,
+				struct perf_event *event)
+{
+	struct perf_event *leader = event->group_leader;
+	struct intel_uncore_box *fake_box;
+	int assign[UNCORE_PMC_IDX_MAX];
+	int ret = -EINVAL, n;
+
+	fake_box = uncore_alloc_box(smp_processor_id());
+	if (!fake_box)
+		return -ENOMEM;
+
+	fake_box->pmu = pmu;
+	/*
+	 * the event is not yet connected with its
+	 * siblings therefore we must first collect
+	 * existing siblings, then add the new event
+	 * before we can simulate the scheduling
+	 */
+	n = uncore_collect_events(fake_box, leader, true);
+	if (n < 0)
+		goto out;
+
+	fake_box->n_events = n;
+	n = uncore_collect_events(fake_box, event, false);
+	if (n < 0)
+		goto out;
+
+	fake_box->n_events = n;
+
+	ret = uncore_assign_events(fake_box, assign, n);
+out:
+	kfree(fake_box);
+	return ret;
+}
+
+int uncore_pmu_event_init(struct perf_event *event)
+{
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_box *box;
+	struct hw_perf_event *hwc = &event->hw;
+	int ret;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	pmu = uncore_event_to_pmu(event);
+	/* no device found for this pmu */
+	if (pmu->func_id < 0)
+		return -ENOENT;
+
+	/*
+	 * Uncore PMU does measure at all privilege level all the time.
+	 * So it doesn't make sense to specify any exclude bits.
+	 */
+	if (event->attr.exclude_user || event->attr.exclude_kernel ||
+			event->attr.exclude_hv || event->attr.exclude_idle)
+		return -EINVAL;
+
+	/* Sampling not supported yet */
+	if (hwc->sample_period)
+		return -EINVAL;
+
+	/*
+	 * Place all uncore events for a particular physical package
+	 * onto a single cpu
+	 */
+	if (event->cpu < 0)
+		return -EINVAL;
+	box = uncore_pmu_to_box(pmu, event->cpu);
+	if (!box || box->cpu < 0)
+		return -EINVAL;
+	event->cpu = box->cpu;
+
+	if (event->attr.config == UNCORE_FIXED_EVENT) {
+		/* no fixed counter */
+		if (!pmu->type->fixed_ctl)
+			return -EINVAL;
+		/*
+		 * if there is only one fixed counter, only the first pmu
+		 * can access the fixed counter
+		 */
+		if (pmu->type->single_fixed && pmu->pmu_idx > 0)
+			return -EINVAL;
+		hwc->config = ~0ULL;
+	} else {
+		hwc->config = event->attr.config & pmu->type->event_mask;
+	}
+
+	event->hw.idx = -1;
+	event->hw.last_tag = ~0ULL;
+
+	if (event->group_leader != event)
+		ret = uncore_validate_group(pmu, event);
+	else
+		ret = 0;
+
+	return ret;
+}
+
+static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
+{
+	int ret;
+
+	pmu->pmu = (struct pmu) {
+		.attr_groups	= pmu->type->attr_groups,
+		.task_ctx_nr	= perf_invalid_context,
+		.event_init	= uncore_pmu_event_init,
+		.add		= uncore_pmu_event_add,
+		.del		= uncore_pmu_event_del,
+		.start		= uncore_pmu_event_start,
+		.stop		= uncore_pmu_event_stop,
+		.read		= uncore_pmu_event_read,
+	};
+
+	if (pmu->type->num_boxes == 1) {
+		if (strlen(pmu->type->name) > 0)
+			sprintf(pmu->name, "uncore_%s", pmu->type->name);
+		else
+			sprintf(pmu->name, "uncore");
+	} else {
+		sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
+			pmu->pmu_idx);
+	}
+
+	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
+	return ret;
+}
+
+static void __init uncore_type_exit(struct intel_uncore_type *type)
+{
+	int i;
+
+	for (i = 0; i < type->num_boxes; i++)
+		free_percpu(type->pmus[i].box);
+	kfree(type->pmus);
+	type->pmus = NULL;
+	kfree(type->attr_groups[1]);
+	type->attr_groups[1] = NULL;
+}
+
+static int __init uncore_type_init(struct intel_uncore_type *type)
+{
+	struct intel_uncore_pmu *pmus;
+	struct attribute_group *events_group;
+	struct attribute **attrs;
+	int i, j;
+
+	pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
+	if (!pmus)
+		return -ENOMEM;
+
+	type->unconstrainted = (struct event_constraint)
+		__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
+				0, type->num_counters, 0);
+
+	for (i = 0; i < type->num_boxes; i++) {
+		pmus[i].func_id = -1;
+		pmus[i].pmu_idx = i;
+		pmus[i].type = type;
+		pmus[i].box = alloc_percpu(struct intel_uncore_box *);
+		if (!pmus[i].box)
+			goto fail;
+	}
+
+	if (type->event_descs) {
+		i = 0;
+		while (type->event_descs[i].attr.attr.name)
+			i++;
+
+		events_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
+					sizeof(*events_group), GFP_KERNEL);
+		if (!events_group)
+			goto fail;
+
+		attrs = (struct attribute **)(events_group + 1);
+		events_group->name = "events";
+		events_group->attrs = attrs;
+
+		for (j = 0; j < i; j++)
+			attrs[j] = &type->event_descs[j].attr.attr;
+
+		type->attr_groups[1] = events_group;
+	}
+
+	type->pmus = pmus;
+	return 0;
+fail:
+	uncore_type_exit(type);
+	return -ENOMEM;
+}
+
+static int __init uncore_types_init(struct intel_uncore_type **types)
+{
+	int i, ret;
+
+	for (i = 0; types[i]; i++) {
+		ret = uncore_type_init(types[i]);
+		if (ret)
+			goto fail;
+	}
+	return 0;
+fail:
+	while (--i >= 0)
+		uncore_type_exit(types[i]);
+	return ret;
+}
+
+static void __cpuinit uncore_cpu_dying(int cpu)
+{
+	struct intel_uncore_type *type;
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_box *box;
+	int i, j;
+
+	for (i = 0; msr_uncores[i]; i++) {
+		type = msr_uncores[i];
+		for (j = 0; j < type->num_boxes; j++) {
+			pmu = &type->pmus[j];
+			box = *per_cpu_ptr(pmu->box, cpu);
+			*per_cpu_ptr(pmu->box, cpu) = NULL;
+			if (box && atomic_dec_and_test(&box->refcnt))
+				kfree(box);
+		}
+	}
+}
+
+static int __cpuinit uncore_cpu_starting(int cpu)
+{
+	struct intel_uncore_type *type;
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_box *box, *exist;
+	int i, j, k, phys_id;
+
+	phys_id = topology_physical_package_id(cpu);
+
+	for (i = 0; msr_uncores[i]; i++) {
+		type = msr_uncores[i];
+		for (j = 0; j < type->num_boxes; j++) {
+			pmu = &type->pmus[j];
+			box = *per_cpu_ptr(pmu->box, cpu);
+			/* called by uncore_cpu_init? */
+			if (box && box->phys_id >= 0) {
+				uncore_box_init(box);
+				continue;
+			}
+
+			for_each_online_cpu(k) {
+				exist = *per_cpu_ptr(pmu->box, k);
+				if (exist && exist->phys_id == phys_id) {
+					atomic_inc(&exist->refcnt);
+					*per_cpu_ptr(pmu->box, cpu) = exist;
+					kfree(box);
+					box = NULL;
+					break;
+				}
+			}
+
+			if (box) {
+				box->phys_id = phys_id;
+				uncore_box_init(box);
+			}
+		}
+	}
+	return 0;
+}
+
+static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id)
+{
+	struct intel_uncore_type *type;
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_box *box;
+	int i, j;
+
+	for (i = 0; msr_uncores[i]; i++) {
+		type = msr_uncores[i];
+		for (j = 0; j < type->num_boxes; j++) {
+			pmu = &type->pmus[j];
+			if (pmu->func_id < 0)
+				pmu->func_id = j;
+
+			box = uncore_alloc_box(cpu);
+			if (!box)
+				return -ENOMEM;
+
+			box->pmu = pmu;
+			box->phys_id = phys_id;
+			*per_cpu_ptr(pmu->box, cpu) = box;
+		}
+	}
+	return 0;
+}
+
+static void __cpuinit uncore_change_context(struct intel_uncore_type **uncores,
+					    int old_cpu, int new_cpu)
+{
+	struct intel_uncore_type *type;
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_box *box;
+	int i, j;
+
+	for (i = 0; uncores[i]; i++) {
+		type = uncores[i];
+		for (j = 0; j < type->num_boxes; j++) {
+			pmu = &type->pmus[j];
+			if (old_cpu < 0)
+				box = uncore_pmu_to_box(pmu, new_cpu);
+			else
+				box = uncore_pmu_to_box(pmu, old_cpu);
+			if (!box)
+				continue;
+
+			if (old_cpu < 0) {
+				WARN_ON_ONCE(box->cpu != -1);
+				box->cpu = new_cpu;
+				continue;
+			}
+
+			WARN_ON_ONCE(box->cpu != old_cpu);
+			if (new_cpu >= 0) {
+				uncore_pmu_cancel_hrtimer(box);
+				perf_pmu_migrate_context(&pmu->pmu,
+						old_cpu, new_cpu);
+				box->cpu = new_cpu;
+			} else {
+				box->cpu = -1;
+			}
+		}
+	}
+}
+
+static void __cpuinit uncore_event_exit_cpu(int cpu)
+{
+	int i, phys_id, target;
+
+	/* if exiting cpu is used for collecting uncore events */
+	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
+		return;
+
+	/* find a new cpu to collect uncore events */
+	phys_id = topology_physical_package_id(cpu);
+	target = -1;
+	for_each_online_cpu(i) {
+		if (i == cpu)
+			continue;
+		if (phys_id == topology_physical_package_id(i)) {
+			target = i;
+			break;
+		}
+	}
+
+	/* migrate uncore events to the new cpu */
+	if (target >= 0)
+		cpumask_set_cpu(target, &uncore_cpu_mask);
+
+	uncore_change_context(msr_uncores, cpu, target);
+}
+
+static void __cpuinit uncore_event_init_cpu(int cpu)
+{
+	int i, phys_id;
+
+	phys_id = topology_physical_package_id(cpu);
+	for_each_cpu(i, &uncore_cpu_mask) {
+		if (phys_id == topology_physical_package_id(i))
+			return;
+	}
+
+	cpumask_set_cpu(cpu, &uncore_cpu_mask);
+
+	uncore_change_context(msr_uncores, -1, cpu);
+}
+
+static int __cpuinit uncore_cpu_notifier(struct notifier_block *self,
+					 unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	/* allocate/free data structure for uncore box */
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		uncore_cpu_prepare(cpu, -1);
+		break;
+	case CPU_STARTING:
+		uncore_cpu_starting(cpu);
+		break;
+	case CPU_UP_CANCELED:
+	case CPU_DYING:
+		uncore_cpu_dying(cpu);
+		break;
+	default:
+		break;
+	}
+
+	/* select the cpu that collects uncore events */
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_DOWN_FAILED:
+	case CPU_STARTING:
+		uncore_event_init_cpu(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+		uncore_event_exit_cpu(cpu);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block uncore_cpu_nb __cpuinitdata = {
+	.notifier_call = uncore_cpu_notifier,
+	/*
+	 * to migrate uncore events, our notifier should be executed
+	 * before perf core's notifier.
+	 */
+	.priority = CPU_PRI_PERF + 1,
+};
+
+static void __init uncore_cpu_setup(void *dummy)
+{
+	uncore_cpu_starting(smp_processor_id());
+}
+
+static int __init uncore_cpu_init(void)
+{
+	int ret, cpu;
+
+	switch (boot_cpu_data.x86_model) {
+	default:
+		return 0;
+	}
+
+	ret = uncore_types_init(msr_uncores);
+	if (ret)
+		return ret;
+
+	get_online_cpus();
+
+	for_each_online_cpu(cpu) {
+		int i, phys_id = topology_physical_package_id(cpu);
+
+		for_each_cpu(i, &uncore_cpu_mask) {
+			if (phys_id == topology_physical_package_id(i)) {
+				phys_id = -1;
+				break;
+			}
+		}
+		if (phys_id < 0)
+			continue;
+
+		uncore_cpu_prepare(cpu, phys_id);
+		uncore_event_init_cpu(cpu);
+	}
+	on_each_cpu(uncore_cpu_setup, NULL, 1);
+
+	register_cpu_notifier(&uncore_cpu_nb);
+
+	put_online_cpus();
+
+	return 0;
+}
+
+static int __init uncore_pmus_register(void)
+{
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_type *type;
+	int i, j;
+
+	for (i = 0; msr_uncores[i]; i++) {
+		type = msr_uncores[i];
+		for (j = 0; j < type->num_boxes; j++) {
+			pmu = &type->pmus[j];
+			uncore_pmu_register(pmu);
+		}
+	}
+
+	return 0;
+}
+
+static int __init intel_uncore_init(void)
+{
+	int ret;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return -ENODEV;
+
+	ret = uncore_cpu_init();
+	if (ret)
+		goto fail;
+
+	uncore_pmus_register();
+	return 0;
+fail:
+	return ret;
+}
+device_initcall(intel_uncore_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
new file mode 100644
index 000000000000..49a6bfbba0de
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -0,0 +1,204 @@
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include "perf_event.h"
+
+#define UNCORE_PMU_NAME_LEN		32
+#define UNCORE_BOX_HASH_SIZE		8
+
+#define UNCORE_PMU_HRTIMER_INTERVAL	(60 * NSEC_PER_SEC)
+
+#define UNCORE_FIXED_EVENT		0xffff
+#define UNCORE_PMC_IDX_MAX_GENERIC	8
+#define UNCORE_PMC_IDX_FIXED		UNCORE_PMC_IDX_MAX_GENERIC
+#define UNCORE_PMC_IDX_MAX		(UNCORE_PMC_IDX_FIXED + 1)
+
+#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
+
+struct intel_uncore_ops;
+struct intel_uncore_pmu;
+struct intel_uncore_box;
+struct uncore_event_desc;
+
+struct intel_uncore_type {
+	const char *name;
+	int num_counters;
+	int num_boxes;
+	int perf_ctr_bits;
+	int fixed_ctr_bits;
+	int single_fixed;
+	unsigned perf_ctr;
+	unsigned event_ctl;
+	unsigned event_mask;
+	unsigned fixed_ctr;
+	unsigned fixed_ctl;
+	unsigned box_ctl;
+	unsigned msr_offset;
+	struct event_constraint unconstrainted;
+	struct event_constraint *constraints;
+	struct intel_uncore_pmu *pmus;
+	struct intel_uncore_ops *ops;
+	struct uncore_event_desc *event_descs;
+	const struct attribute_group *attr_groups[3];
+};
+
+#define format_group attr_groups[0]
+
+struct intel_uncore_ops {
+	void (*init_box)(struct intel_uncore_box *);
+	void (*disable_box)(struct intel_uncore_box *);
+	void (*enable_box)(struct intel_uncore_box *);
+	void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
+	void (*enable_event)(struct intel_uncore_box *, struct perf_event *);
+	u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *);
+};
+
+struct intel_uncore_pmu {
+	struct pmu pmu;
+	char name[UNCORE_PMU_NAME_LEN];
+	int pmu_idx;
+	int func_id;
+	struct intel_uncore_type *type;
+	struct intel_uncore_box ** __percpu box;
+};
+
+struct intel_uncore_box {
+	int phys_id;
+	int n_active;	/* number of active events */
+	int n_events;
+	int cpu;	/* cpu to collect events */
+	unsigned long flags;
+	atomic_t refcnt;
+	struct perf_event *events[UNCORE_PMC_IDX_MAX];
+	struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
+	unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
+	u64 tags[UNCORE_PMC_IDX_MAX];
+	struct intel_uncore_pmu *pmu;
+	struct hrtimer hrtimer;
+	struct list_head list;
+};
+
+#define UNCORE_BOX_FLAG_INITIATED	0
+
+struct uncore_event_desc {
+	struct kobj_attribute attr;
+	const char *config;
+};
+
+#define INTEL_UNCORE_EVENT_DESC(_name, _config)			\
+{								\
+	.attr	= __ATTR(_name, 0444, uncore_event_show, NULL),	\
+	.config	= _config,					\
+}
+
+#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)			\
+static ssize_t __uncore_##_var##_show(struct kobject *kobj,		\
+				struct kobj_attribute *attr,		\
+				char *page)				\
+{									\
+	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
+	return sprintf(page, _format "\n");				\
+}									\
+static struct kobj_attribute format_attr_##_var =			\
+	__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
+
+
+static ssize_t uncore_event_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *buf)
+{
+	struct uncore_event_desc *event =
+		container_of(attr, struct uncore_event_desc, attr);
+	return sprintf(buf, "%s", event->config);
+}
+
+static inline
+unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
+{
+	if (!box->pmu->type->box_ctl)
+		return 0;
+	return box->pmu->type->box_ctl +
+		box->pmu->type->msr_offset * box->pmu->pmu_idx;
+}
+
+static inline
+unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
+{
+	if (!box->pmu->type->fixed_ctl)
+		return 0;
+	return box->pmu->type->fixed_ctl +
+		box->pmu->type->msr_offset * box->pmu->pmu_idx;
+}
+
+static inline
+unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
+{
+	return box->pmu->type->fixed_ctr +
+		box->pmu->type->msr_offset * box->pmu->pmu_idx;
+}
+
+static inline
+unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
+{
+	return idx + box->pmu->type->event_ctl +
+		box->pmu->type->msr_offset * box->pmu->pmu_idx;
+}
+
+static inline
+unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+	return idx + box->pmu->type->perf_ctr +
+		box->pmu->type->msr_offset * box->pmu->pmu_idx;
+}
+
+static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box)
+{
+	return box->pmu->type->perf_ctr_bits;
+}
+
+static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box)
+{
+	return box->pmu->type->fixed_ctr_bits;
+}
+
+static inline int uncore_num_counters(struct intel_uncore_box *box)
+{
+	return box->pmu->type->num_counters;
+}
+
+static inline void uncore_disable_box(struct intel_uncore_box *box)
+{
+	if (box->pmu->type->ops->disable_box)
+		box->pmu->type->ops->disable_box(box);
+}
+
+static inline void uncore_enable_box(struct intel_uncore_box *box)
+{
+	if (box->pmu->type->ops->enable_box)
+		box->pmu->type->ops->enable_box(box);
+}
+
+static inline void uncore_disable_event(struct intel_uncore_box *box,
+				struct perf_event *event)
+{
+	box->pmu->type->ops->disable_event(box, event);
+}
+
+static inline void uncore_enable_event(struct intel_uncore_box *box,
+				struct perf_event *event)
+{
+	box->pmu->type->ops->enable_event(box, event);
+}
+
+static inline u64 uncore_read_counter(struct intel_uncore_box *box,
+				struct perf_event *event)
+{
+	return box->pmu->type->ops->read_counter(box, event);
+}
+
+static inline void uncore_box_init(struct intel_uncore_box *box)
+{
+	if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+		if (box->pmu->type->ops->init_box)
+			box->pmu->type->ops->init_box(box);
+	}
+}
-- 
cgit v1.2.3


From fcde10e916326545e8fec1807357c68ef08dc443 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Fri, 15 Jun 2012 14:31:35 +0800
Subject: perf/x86: Add Intel Nehalem and Sandy Bridge uncore PMU support

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1339741902-8449-7-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 195 ++++++++++++++++++++++++++
 arch/x86/kernel/cpu/perf_event_intel_uncore.h |  50 +++++++
 2 files changed, 245 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index fe76a07dfdbc..3ed941ac3745 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -10,6 +10,192 @@ static cpumask_t uncore_cpu_mask;
 static struct event_constraint constraint_fixed =
 	EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
 
+DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
+DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
+
+/* Sandy Bridge uncore support */
+static void snb_uncore_msr_enable_event(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->idx < UNCORE_PMC_IDX_FIXED)
+		wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
+	else
+		wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
+}
+
+static void snb_uncore_msr_disable_event(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	wrmsrl(event->hw.config_base, 0);
+}
+
+static u64 snb_uncore_msr_read_counter(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	u64 count;
+	rdmsrl(event->hw.event_base, count);
+	return count;
+}
+
+static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+	if (box->pmu->pmu_idx == 0) {
+		wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+			SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
+	}
+}
+
+static struct attribute *snb_uncore_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_cmask5.attr,
+	NULL,
+};
+
+static struct attribute_group snb_uncore_format_group = {
+	.name = "format",
+	.attrs = snb_uncore_formats_attr,
+};
+
+static struct intel_uncore_ops snb_uncore_msr_ops = {
+	.init_box	= snb_uncore_msr_init_box,
+	.disable_event	= snb_uncore_msr_disable_event,
+	.enable_event	= snb_uncore_msr_enable_event,
+	.read_counter	= snb_uncore_msr_read_counter,
+};
+
+static struct event_constraint snb_uncore_cbox_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
+	EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type snb_uncore_cbox = {
+	.name		= "cbox",
+	.num_counters   = 2,
+	.num_boxes	= 4,
+	.perf_ctr_bits	= 44,
+	.fixed_ctr_bits	= 48,
+	.perf_ctr	= SNB_UNC_CBO_0_PER_CTR0,
+	.event_ctl	= SNB_UNC_CBO_0_PERFEVTSEL0,
+	.fixed_ctr	= SNB_UNC_FIXED_CTR,
+	.fixed_ctl	= SNB_UNC_FIXED_CTR_CTRL,
+	.single_fixed	= 1,
+	.event_mask	= SNB_UNC_RAW_EVENT_MASK,
+	.msr_offset	= SNB_UNC_CBO_MSR_OFFSET,
+	.constraints	= snb_uncore_cbox_constraints,
+	.ops		= &snb_uncore_msr_ops,
+	.format_group	= &snb_uncore_format_group,
+};
+
+static struct intel_uncore_type *snb_msr_uncores[] = {
+	&snb_uncore_cbox,
+	NULL,
+};
+/* end of Sandy Bridge uncore support */
+
+/* Nehalem uncore support */
+static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+	wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
+}
+
+static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+	wrmsrl(NHM_UNC_PERF_GLOBAL_CTL,
+		NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
+}
+
+static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->idx < UNCORE_PMC_IDX_FIXED)
+		wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
+	else
+		wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
+}
+
+static struct attribute *nhm_uncore_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_cmask8.attr,
+	NULL,
+};
+
+static struct attribute_group nhm_uncore_format_group = {
+	.name = "format",
+	.attrs = nhm_uncore_formats_attr,
+};
+
+static struct uncore_event_desc nhm_uncore_events[] = {
+	INTEL_UNCORE_EVENT_DESC(CLOCKTICKS, "config=0xffff"),
+	/* full cache line writes to DRAM */
+	INTEL_UNCORE_EVENT_DESC(QMC_WRITES_FULL_ANY, "event=0x2f,umask=0xf"),
+	/* Quickpath Memory Controller normal priority read requests */
+	INTEL_UNCORE_EVENT_DESC(QMC_NORMAL_READS_ANY, "event=0x2c,umask=0xf"),
+	/* Quickpath Home Logic read requests from the IOH */
+	INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_IOH_READS,
+				"event=0x20,umask=0x1"),
+	/* Quickpath Home Logic write requests from the IOH */
+	INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_IOH_WRITES,
+				"event=0x20,umask=0x2"),
+	/* Quickpath Home Logic read requests from a remote socket */
+	INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_REMOTE_READS,
+				"event=0x20,umask=0x4"),
+	/* Quickpath Home Logic write requests from a remote socket */
+	INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_REMOTE_WRITES,
+				"event=0x20,umask=0x8"),
+	/* Quickpath Home Logic read requests from the local socket */
+	INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_LOCAL_READS,
+				"event=0x20,umask=0x10"),
+	/* Quickpath Home Logic write requests from the local socket */
+	INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_LOCAL_WRITES,
+				"event=0x20,umask=0x20"),
+	{ /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhm_uncore_msr_ops = {
+	.disable_box	= nhm_uncore_msr_disable_box,
+	.enable_box	= nhm_uncore_msr_enable_box,
+	.disable_event	= snb_uncore_msr_disable_event,
+	.enable_event	= nhm_uncore_msr_enable_event,
+	.read_counter	= snb_uncore_msr_read_counter,
+};
+
+static struct intel_uncore_type nhm_uncore = {
+	.name		= "",
+	.num_counters   = 8,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.event_ctl	= NHM_UNC_PERFEVTSEL0,
+	.perf_ctr	= NHM_UNC_UNCORE_PMC0,
+	.fixed_ctr	= NHM_UNC_FIXED_CTR,
+	.fixed_ctl	= NHM_UNC_FIXED_CTR_CTRL,
+	.event_mask	= NHM_UNC_RAW_EVENT_MASK,
+	.event_descs	= nhm_uncore_events,
+	.ops		= &nhm_uncore_msr_ops,
+	.format_group	= &nhm_uncore_format_group,
+};
+
+static struct intel_uncore_type *nhm_msr_uncores[] = {
+	&nhm_uncore,
+	NULL,
+};
+/* end of Nehalem uncore support */
+
 static void uncore_assign_hw_event(struct intel_uncore_box *box,
 				struct perf_event *event, int idx)
 {
@@ -808,6 +994,15 @@ static int __init uncore_cpu_init(void)
 	int ret, cpu;
 
 	switch (boot_cpu_data.x86_model) {
+	case 26: /* Nehalem */
+	case 30:
+	case 37: /* Westmere */
+	case 44:
+		msr_uncores = nhm_msr_uncores;
+		break;
+	case 42: /* Sandy Bridge */
+		msr_uncores = snb_msr_uncores;
+		break;
 	default:
 		return 0;
 	}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 49a6bfbba0de..eeb5ca5815a8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -15,6 +15,56 @@
 
 #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
 
+/* SNB event control */
+#define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
+#define SNB_UNC_CTL_UMASK_MASK			0x0000ff00
+#define SNB_UNC_CTL_EDGE_DET			(1 << 18)
+#define SNB_UNC_CTL_EN				(1 << 22)
+#define SNB_UNC_CTL_INVERT			(1 << 23)
+#define SNB_UNC_CTL_CMASK_MASK			0x1f000000
+#define NHM_UNC_CTL_CMASK_MASK			0xff000000
+#define NHM_UNC_FIXED_CTR_CTL_EN		(1 << 0)
+
+#define SNB_UNC_RAW_EVENT_MASK			(SNB_UNC_CTL_EV_SEL_MASK | \
+						 SNB_UNC_CTL_UMASK_MASK | \
+						 SNB_UNC_CTL_EDGE_DET | \
+						 SNB_UNC_CTL_INVERT | \
+						 SNB_UNC_CTL_CMASK_MASK)
+
+#define NHM_UNC_RAW_EVENT_MASK			(SNB_UNC_CTL_EV_SEL_MASK | \
+						 SNB_UNC_CTL_UMASK_MASK | \
+						 SNB_UNC_CTL_EDGE_DET | \
+						 SNB_UNC_CTL_INVERT | \
+						 NHM_UNC_CTL_CMASK_MASK)
+
+/* SNB global control register */
+#define SNB_UNC_PERF_GLOBAL_CTL                 0x391
+#define SNB_UNC_FIXED_CTR_CTRL                  0x394
+#define SNB_UNC_FIXED_CTR                       0x395
+
+/* SNB uncore global control */
+#define SNB_UNC_GLOBAL_CTL_CORE_ALL             ((1 << 4) - 1)
+#define SNB_UNC_GLOBAL_CTL_EN                   (1 << 29)
+
+/* SNB Cbo register */
+#define SNB_UNC_CBO_0_PERFEVTSEL0               0x700
+#define SNB_UNC_CBO_0_PER_CTR0                  0x706
+#define SNB_UNC_CBO_MSR_OFFSET                  0x10
+
+/* NHM global control register */
+#define NHM_UNC_PERF_GLOBAL_CTL                 0x391
+#define NHM_UNC_FIXED_CTR                       0x394
+#define NHM_UNC_FIXED_CTR_CTRL                  0x395
+
+/* NHM uncore global control */
+#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL            ((1ULL << 8) - 1)
+#define NHM_UNC_GLOBAL_CTL_EN_FC                (1ULL << 32)
+
+/* NHM uncore register */
+#define NHM_UNC_PERFEVTSEL0                     0x3c0
+#define NHM_UNC_UNCORE_PMC0                     0x3b0
+
+
 struct intel_uncore_ops;
 struct intel_uncore_pmu;
 struct intel_uncore_box;
-- 
cgit v1.2.3


From 14371cce03c2fc393997e17f979e76674b7f392a Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Fri, 15 Jun 2012 14:31:36 +0800
Subject: perf: Add generic PCI uncore PMU device support

This patch adds generic support for uncore PMUs presented as
PCI devices. (These come in addition to the CPU/MSR based
uncores.)

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1339741902-8449-8-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 175 +++++++++++++++++++++++++-
 arch/x86/kernel/cpu/perf_event_intel_uncore.h |  66 ++++++++++
 2 files changed, 236 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 3ed941ac3745..e20c65a0e108 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2,6 +2,11 @@
 
 static struct intel_uncore_type *empty_uncore[] = { NULL, };
 static struct intel_uncore_type **msr_uncores = empty_uncore;
+static struct intel_uncore_type **pci_uncores = empty_uncore;
+/* pci bus to socket mapping */
+static int pcibus_to_physid[256] = { [0 ... 255] = -1, };
+
+static DEFINE_RAW_SPINLOCK(uncore_box_lock);
 
 /* mask of cpus that collect uncore events */
 static cpumask_t uncore_cpu_mask;
@@ -205,13 +210,13 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box,
 	hwc->last_tag = ++box->tags[idx];
 
 	if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
-		hwc->event_base = uncore_msr_fixed_ctr(box);
-		hwc->config_base = uncore_msr_fixed_ctl(box);
+		hwc->event_base = uncore_fixed_ctr(box);
+		hwc->config_base = uncore_fixed_ctl(box);
 		return;
 	}
 
-	hwc->config_base = uncore_msr_event_ctl(box, hwc->idx);
-	hwc->event_base =  uncore_msr_perf_ctr(box, hwc->idx);
+	hwc->config_base = uncore_event_ctl(box, hwc->idx);
+	hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
 }
 
 static void uncore_perf_event_update(struct intel_uncore_box *box,
@@ -305,6 +310,22 @@ struct intel_uncore_box *uncore_alloc_box(int cpu)
 static struct intel_uncore_box *
 uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
 {
+	static struct intel_uncore_box *box;
+
+	box = *per_cpu_ptr(pmu->box, cpu);
+	if (box)
+		return box;
+
+	raw_spin_lock(&uncore_box_lock);
+	list_for_each_entry(box, &pmu->box_list, list) {
+		if (box->phys_id == topology_physical_package_id(cpu)) {
+			atomic_inc(&box->refcnt);
+			*per_cpu_ptr(pmu->box, cpu) = box;
+			break;
+		}
+	}
+	raw_spin_unlock(&uncore_box_lock);
+
 	return *per_cpu_ptr(pmu->box, cpu);
 }
 
@@ -706,6 +727,13 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
 	type->attr_groups[1] = NULL;
 }
 
+static void uncore_types_exit(struct intel_uncore_type **types)
+{
+	int i;
+	for (i = 0; types[i]; i++)
+		uncore_type_exit(types[i]);
+}
+
 static int __init uncore_type_init(struct intel_uncore_type *type)
 {
 	struct intel_uncore_pmu *pmus;
@@ -725,6 +753,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
 		pmus[i].func_id = -1;
 		pmus[i].pmu_idx = i;
 		pmus[i].type = type;
+		INIT_LIST_HEAD(&pmus[i].box_list);
 		pmus[i].box = alloc_percpu(struct intel_uncore_box *);
 		if (!pmus[i].box)
 			goto fail;
@@ -773,6 +802,127 @@ fail:
 	return ret;
 }
 
+static struct pci_driver *uncore_pci_driver;
+static bool pcidrv_registered;
+
+/*
+ * add a pci uncore device
+ */
+static int __devinit uncore_pci_add(struct intel_uncore_type *type,
+				    struct pci_dev *pdev)
+{
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_box *box;
+	int i, phys_id;
+
+	phys_id = pcibus_to_physid[pdev->bus->number];
+	if (phys_id < 0)
+		return -ENODEV;
+
+	box = uncore_alloc_box(0);
+	if (!box)
+		return -ENOMEM;
+
+	/*
+	 * for performance monitoring unit with multiple boxes,
+	 * each box has a different function id.
+	 */
+	for (i = 0; i < type->num_boxes; i++) {
+		pmu = &type->pmus[i];
+		if (pmu->func_id == pdev->devfn)
+			break;
+		if (pmu->func_id < 0) {
+			pmu->func_id = pdev->devfn;
+			break;
+		}
+		pmu = NULL;
+	}
+
+	if (!pmu) {
+		kfree(box);
+		return -EINVAL;
+	}
+
+	box->phys_id = phys_id;
+	box->pci_dev = pdev;
+	box->pmu = pmu;
+	uncore_box_init(box);
+	pci_set_drvdata(pdev, box);
+
+	raw_spin_lock(&uncore_box_lock);
+	list_add_tail(&box->list, &pmu->box_list);
+	raw_spin_unlock(&uncore_box_lock);
+
+	return 0;
+}
+
+static void __devexit uncore_pci_remove(struct pci_dev *pdev)
+{
+	struct intel_uncore_box *box = pci_get_drvdata(pdev);
+	struct intel_uncore_pmu *pmu = box->pmu;
+	int cpu, phys_id = pcibus_to_physid[pdev->bus->number];
+
+	if (WARN_ON_ONCE(phys_id != box->phys_id))
+		return;
+
+	raw_spin_lock(&uncore_box_lock);
+	list_del(&box->list);
+	raw_spin_unlock(&uncore_box_lock);
+
+	for_each_possible_cpu(cpu) {
+		if (*per_cpu_ptr(pmu->box, cpu) == box) {
+			*per_cpu_ptr(pmu->box, cpu) = NULL;
+			atomic_dec(&box->refcnt);
+		}
+	}
+
+	WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
+	kfree(box);
+}
+
+static int __devinit uncore_pci_probe(struct pci_dev *pdev,
+				const struct pci_device_id *id)
+{
+	struct intel_uncore_type *type;
+
+	type = (struct intel_uncore_type *)id->driver_data;
+	return uncore_pci_add(type, pdev);
+}
+
+static int __init uncore_pci_init(void)
+{
+	int ret;
+
+	switch (boot_cpu_data.x86_model) {
+	default:
+		return 0;
+	}
+
+	ret = uncore_types_init(pci_uncores);
+	if (ret)
+		return ret;
+
+	uncore_pci_driver->probe = uncore_pci_probe;
+	uncore_pci_driver->remove = uncore_pci_remove;
+
+	ret = pci_register_driver(uncore_pci_driver);
+	if (ret == 0)
+		pcidrv_registered = true;
+	else
+		uncore_types_exit(pci_uncores);
+
+	return ret;
+}
+
+static void __init uncore_pci_exit(void)
+{
+	if (pcidrv_registered) {
+		pcidrv_registered = false;
+		pci_unregister_driver(uncore_pci_driver);
+		uncore_types_exit(pci_uncores);
+	}
+}
+
 static void __cpuinit uncore_cpu_dying(int cpu)
 {
 	struct intel_uncore_type *type;
@@ -921,6 +1071,7 @@ static void __cpuinit uncore_event_exit_cpu(int cpu)
 		cpumask_set_cpu(target, &uncore_cpu_mask);
 
 	uncore_change_context(msr_uncores, cpu, target);
+	uncore_change_context(pci_uncores, cpu, target);
 }
 
 static void __cpuinit uncore_event_init_cpu(int cpu)
@@ -936,6 +1087,7 @@ static void __cpuinit uncore_event_init_cpu(int cpu)
 	cpumask_set_cpu(cpu, &uncore_cpu_mask);
 
 	uncore_change_context(msr_uncores, -1, cpu);
+	uncore_change_context(pci_uncores, -1, cpu);
 }
 
 static int __cpuinit uncore_cpu_notifier(struct notifier_block *self,
@@ -1051,6 +1203,14 @@ static int __init uncore_pmus_register(void)
 		}
 	}
 
+	for (i = 0; pci_uncores[i]; i++) {
+		type = pci_uncores[i];
+		for (j = 0; j < type->num_boxes; j++) {
+			pmu = &type->pmus[j];
+			uncore_pmu_register(pmu);
+		}
+	}
+
 	return 0;
 }
 
@@ -1061,9 +1221,14 @@ static int __init intel_uncore_init(void)
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
 		return -ENODEV;
 
-	ret = uncore_cpu_init();
+	ret = uncore_pci_init();
 	if (ret)
 		goto fail;
+	ret = uncore_cpu_init();
+	if (ret) {
+		uncore_pci_exit();
+		goto fail;
+	}
 
 	uncore_pmus_register();
 	return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index eeb5ca5815a8..aa01df87b8de 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -1,5 +1,6 @@
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/pci.h>
 #include <linux/perf_event.h>
 #include "perf_event.h"
 
@@ -110,6 +111,7 @@ struct intel_uncore_pmu {
 	int func_id;
 	struct intel_uncore_type *type;
 	struct intel_uncore_box ** __percpu box;
+	struct list_head box_list;
 };
 
 struct intel_uncore_box {
@@ -123,6 +125,7 @@ struct intel_uncore_box {
 	struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
 	unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
 	u64 tags[UNCORE_PMC_IDX_MAX];
+	struct pci_dev *pci_dev;
 	struct intel_uncore_pmu *pmu;
 	struct hrtimer hrtimer;
 	struct list_head list;
@@ -161,6 +164,33 @@ static ssize_t uncore_event_show(struct kobject *kobj,
 	return sprintf(buf, "%s", event->config);
 }
 
+static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
+{
+	return box->pmu->type->box_ctl;
+}
+
+static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box)
+{
+	return box->pmu->type->fixed_ctl;
+}
+
+static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box)
+{
+	return box->pmu->type->fixed_ctr;
+}
+
+static inline
+unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx)
+{
+	return idx * 4 + box->pmu->type->event_ctl;
+}
+
+static inline
+unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+	return idx * 8 + box->pmu->type->perf_ctr;
+}
+
 static inline
 unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
 {
@@ -200,6 +230,42 @@ unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
 		box->pmu->type->msr_offset * box->pmu->pmu_idx;
 }
 
+static inline
+unsigned uncore_fixed_ctl(struct intel_uncore_box *box)
+{
+	if (box->pci_dev)
+		return uncore_pci_fixed_ctl(box);
+	else
+		return uncore_msr_fixed_ctl(box);
+}
+
+static inline
+unsigned uncore_fixed_ctr(struct intel_uncore_box *box)
+{
+	if (box->pci_dev)
+		return uncore_pci_fixed_ctr(box);
+	else
+		return uncore_msr_fixed_ctr(box);
+}
+
+static inline
+unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx)
+{
+	if (box->pci_dev)
+		return uncore_pci_event_ctl(box, idx);
+	else
+		return uncore_msr_event_ctl(box, idx);
+}
+
+static inline
+unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+	if (box->pci_dev)
+		return uncore_pci_perf_ctr(box, idx);
+	else
+		return uncore_msr_perf_ctr(box, idx);
+}
+
 static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box)
 {
 	return box->pmu->type->perf_ctr_bits;
-- 
cgit v1.2.3


From 7c94ee2e0917b2ea56498bff939c8aa55da27207 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Fri, 15 Jun 2012 14:31:37 +0800
Subject: perf/x86: Add Intel Nehalem and Sandy Bridge-EP uncore support

The uncore subsystem in Sandy Bridge-EP consists of 8 components:

 Ubox, Cacheing Agent, Home Agent, Memory controller, Power Control,
 QPI Link Layer, R2PCIe, R3QPI.

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1339741902-8449-9-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 484 ++++++++++++++++++++++++++
 arch/x86/kernel/cpu/perf_event_intel_uncore.h |  86 +++++
 include/linux/pci_ids.h                       |  11 +
 3 files changed, 581 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index e20c65a0e108..d34f68bf990b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -21,6 +21,482 @@ DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
 DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
 DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
 DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
+DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
+DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
+DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
+
+/* Sandy Bridge-EP uncore support */
+static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	int box_ctl = uncore_pci_box_ctl(box);
+	u32 config;
+
+	pci_read_config_dword(pdev, box_ctl, &config);
+	config |= SNBEP_PMON_BOX_CTL_FRZ;
+	pci_write_config_dword(pdev, box_ctl, config);
+}
+
+static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	int box_ctl = uncore_pci_box_ctl(box);
+	u32 config;
+
+	pci_read_config_dword(pdev, box_ctl, &config);
+	config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+	pci_write_config_dword(pdev, box_ctl, config);
+}
+
+static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	struct hw_perf_event *hwc = &event->hw;
+
+	pci_write_config_dword(pdev, hwc->config_base, hwc->config |
+				SNBEP_PMON_CTL_EN);
+}
+
+static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	struct hw_perf_event *hwc = &event->hw;
+
+	pci_write_config_dword(pdev, hwc->config_base, hwc->config);
+}
+
+static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	struct hw_perf_event *hwc = &event->hw;
+	u64 count;
+
+	pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
+	pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
+	return count;
+}
+
+static void snbep_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL,
+				SNBEP_PMON_BOX_CTL_INT);
+}
+
+static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+	u64 config;
+	unsigned msr;
+
+	msr = uncore_msr_box_ctl(box);
+	if (msr) {
+		rdmsrl(msr, config);
+		config |= SNBEP_PMON_BOX_CTL_FRZ;
+		wrmsrl(msr, config);
+		return;
+	}
+}
+
+static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+	u64 config;
+	unsigned msr;
+
+	msr = uncore_msr_box_ctl(box);
+	if (msr) {
+		rdmsrl(msr, config);
+		config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+		wrmsrl(msr, config);
+		return;
+	}
+}
+
+static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	wrmsrl(hwc->config_base, hwc->config);
+}
+
+static u64 snbep_uncore_msr_read_counter(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 count;
+
+	rdmsrl(hwc->event_base, count);
+	return count;
+}
+
+static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+	unsigned msr = uncore_msr_box_ctl(box);
+	if (msr)
+		wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
+}
+
+static struct attribute *snbep_uncore_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh8.attr,
+	NULL,
+};
+
+static struct attribute *snbep_uncore_ubox_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh5.attr,
+	NULL,
+};
+
+static struct attribute *snbep_uncore_pcu_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_occ_sel.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh5.attr,
+	&format_attr_occ_invert.attr,
+	&format_attr_occ_edge.attr,
+	NULL,
+};
+
+static struct uncore_event_desc snbep_uncore_imc_events[] = {
+	INTEL_UNCORE_EVENT_DESC(CLOCKTICKS, "config=0xffff"),
+	/* read */
+	INTEL_UNCORE_EVENT_DESC(CAS_COUNT_RD, "event=0x4,umask=0x3"),
+	/* write */
+	INTEL_UNCORE_EVENT_DESC(CAS_COUNT_WR, "event=0x4,umask=0xc"),
+	{ /* end: all zeroes */ },
+};
+
+static struct uncore_event_desc snbep_uncore_qpi_events[] = {
+	INTEL_UNCORE_EVENT_DESC(CLOCKTICKS, "event=0x14"),
+	/* outgoing data+nondata flits */
+	INTEL_UNCORE_EVENT_DESC(TxL_FLITS_ACTIVE, "event=0x0,umask=0x6"),
+	/* DRS data received */
+	INTEL_UNCORE_EVENT_DESC(DRS_DATA, "event=0x2,umask=0x8"),
+	/* NCB data received */
+	INTEL_UNCORE_EVENT_DESC(NCB_DATA, "event=0x3,umask=0x4"),
+	{ /* end: all zeroes */ },
+};
+
+static struct attribute_group snbep_uncore_format_group = {
+	.name = "format",
+	.attrs = snbep_uncore_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_ubox_format_group = {
+	.name = "format",
+	.attrs = snbep_uncore_ubox_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_pcu_format_group = {
+	.name = "format",
+	.attrs = snbep_uncore_pcu_formats_attr,
+};
+
+static struct intel_uncore_ops snbep_uncore_msr_ops = {
+	.init_box	= snbep_uncore_msr_init_box,
+	.disable_box	= snbep_uncore_msr_disable_box,
+	.enable_box	= snbep_uncore_msr_enable_box,
+	.disable_event	= snbep_uncore_msr_disable_event,
+	.enable_event	= snbep_uncore_msr_enable_event,
+	.read_counter	= snbep_uncore_msr_read_counter,
+};
+
+static struct intel_uncore_ops snbep_uncore_pci_ops = {
+	.init_box	= snbep_uncore_pci_init_box,
+	.disable_box	= snbep_uncore_pci_disable_box,
+	.enable_box	= snbep_uncore_pci_enable_box,
+	.disable_event	= snbep_uncore_pci_disable_event,
+	.enable_event	= snbep_uncore_pci_enable_event,
+	.read_counter	= snbep_uncore_pci_read_counter,
+};
+
+static struct event_constraint snbep_uncore_cbox_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x04, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x05, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x07, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x13, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x1b, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0x1c, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0x1d, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0x1e, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0x1f, 0xe),
+	UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint snbep_uncore_r2pcie_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x12, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint snbep_uncore_r3qpi_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x30, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+	EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type snbep_uncore_ubox = {
+	.name		= "ubox",
+	.num_counters   = 2,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 44,
+	.fixed_ctr_bits	= 48,
+	.perf_ctr	= SNBEP_U_MSR_PMON_CTR0,
+	.event_ctl	= SNBEP_U_MSR_PMON_CTL0,
+	.event_mask	= SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+	.fixed_ctr	= SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
+	.fixed_ctl	= SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
+	.ops		= &snbep_uncore_msr_ops,
+	.format_group	= &snbep_uncore_ubox_format_group,
+};
+
+static struct intel_uncore_type snbep_uncore_cbox = {
+	.name		= "cbox",
+	.num_counters   = 4,
+	.num_boxes	= 8,
+	.perf_ctr_bits	= 44,
+	.event_ctl	= SNBEP_C0_MSR_PMON_CTL0,
+	.perf_ctr	= SNBEP_C0_MSR_PMON_CTR0,
+	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,
+	.box_ctl	= SNBEP_C0_MSR_PMON_BOX_CTL,
+	.msr_offset	= SNBEP_CBO_MSR_OFFSET,
+	.constraints	= snbep_uncore_cbox_constraints,
+	.ops		= &snbep_uncore_msr_ops,
+	.format_group	= &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type snbep_uncore_pcu = {
+	.name		= "pcu",
+	.num_counters   = 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.perf_ctr	= SNBEP_PCU_MSR_PMON_CTR0,
+	.event_ctl	= SNBEP_PCU_MSR_PMON_CTL0,
+	.event_mask	= SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+	.box_ctl	= SNBEP_PCU_MSR_PMON_BOX_CTL,
+	.ops		= &snbep_uncore_msr_ops,
+	.format_group	= &snbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *snbep_msr_uncores[] = {
+	&snbep_uncore_ubox,
+	&snbep_uncore_cbox,
+	&snbep_uncore_pcu,
+	NULL,
+};
+
+#define SNBEP_UNCORE_PCI_COMMON_INIT()				\
+	.perf_ctr	= SNBEP_PCI_PMON_CTR0,			\
+	.event_ctl	= SNBEP_PCI_PMON_CTL0,			\
+	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,		\
+	.box_ctl	= SNBEP_PCI_PMON_BOX_CTL,		\
+	.ops		= &snbep_uncore_pci_ops,		\
+	.format_group	= &snbep_uncore_format_group
+
+static struct intel_uncore_type snbep_uncore_ha = {
+	.name		= "ha",
+	.num_counters   = 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_imc = {
+	.name		= "imc",
+	.num_counters   = 4,
+	.num_boxes	= 4,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.fixed_ctr	= SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+	.fixed_ctl	= SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+	.event_descs	= snbep_uncore_imc_events,
+	SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_qpi = {
+	.name		= "qpi",
+	.num_counters   = 4,
+	.num_boxes	= 2,
+	.perf_ctr_bits	= 48,
+	.event_descs	= snbep_uncore_qpi_events,
+	SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+
+static struct intel_uncore_type snbep_uncore_r2pcie = {
+	.name		= "r2pcie",
+	.num_counters   = 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 44,
+	.constraints	= snbep_uncore_r2pcie_constraints,
+	SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_r3qpi = {
+	.name		= "r3qpi",
+	.num_counters   = 3,
+	.num_boxes	= 2,
+	.perf_ctr_bits	= 44,
+	.constraints	= snbep_uncore_r3qpi_constraints,
+	SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type *snbep_pci_uncores[] = {
+	&snbep_uncore_ha,
+	&snbep_uncore_imc,
+	&snbep_uncore_qpi,
+	&snbep_uncore_r2pcie,
+	&snbep_uncore_r3qpi,
+	NULL,
+};
+
+static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = {
+	{ /* Home Agent */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
+		.driver_data = (unsigned long)&snbep_uncore_ha,
+	},
+	{ /* MC Channel 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0),
+		.driver_data = (unsigned long)&snbep_uncore_imc,
+	},
+	{ /* MC Channel 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1),
+		.driver_data = (unsigned long)&snbep_uncore_imc,
+	},
+	{ /* MC Channel 2 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2),
+		.driver_data = (unsigned long)&snbep_uncore_imc,
+	},
+	{ /* MC Channel 3 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3),
+		.driver_data = (unsigned long)&snbep_uncore_imc,
+	},
+	{ /* QPI Port 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0),
+		.driver_data = (unsigned long)&snbep_uncore_qpi,
+	},
+	{ /* QPI Port 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1),
+		.driver_data = (unsigned long)&snbep_uncore_qpi,
+	},
+	{ /* P2PCIe */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE),
+		.driver_data = (unsigned long)&snbep_uncore_r2pcie,
+	},
+	{ /* R3QPI Link 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0),
+		.driver_data = (unsigned long)&snbep_uncore_r3qpi,
+	},
+	{ /* R3QPI Link 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
+		.driver_data = (unsigned long)&snbep_uncore_r3qpi,
+	},
+	{ /* end: all zeroes */ }
+};
+
+static struct pci_driver snbep_uncore_pci_driver = {
+	.name		= "snbep_uncore",
+	.id_table	= snbep_uncore_pci_ids,
+};
+
+/*
+ * build pci bus to socket mapping
+ */
+static void snbep_pci2phy_map_init(void)
+{
+	struct pci_dev *ubox_dev = NULL;
+	int i, bus, nodeid;
+	u32 config;
+
+	while (1) {
+		/* find the UBOX device */
+		ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
+					PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX,
+					ubox_dev);
+		if (!ubox_dev)
+			break;
+		bus = ubox_dev->bus->number;
+		/* get the Node ID of the local register */
+		pci_read_config_dword(ubox_dev, 0x40, &config);
+		nodeid = config;
+		/* get the Node ID mapping */
+		pci_read_config_dword(ubox_dev, 0x54, &config);
+		/*
+		 * every three bits in the Node ID mapping register maps
+		 * to a particular node.
+		 */
+		for (i = 0; i < 8; i++) {
+			if (nodeid == ((config >> (3 * i)) & 0x7)) {
+				pcibus_to_physid[bus] = i;
+				break;
+			}
+		}
+	};
+	return;
+}
+/* end of Sandy Bridge-EP uncore support */
+
 
 /* Sandy Bridge uncore support */
 static void snb_uncore_msr_enable_event(struct intel_uncore_box *box,
@@ -894,6 +1370,11 @@ static int __init uncore_pci_init(void)
 	int ret;
 
 	switch (boot_cpu_data.x86_model) {
+	case 45: /* Sandy Bridge-EP */
+		pci_uncores = snbep_pci_uncores;
+		uncore_pci_driver = &snbep_uncore_pci_driver;
+		snbep_pci2phy_map_init();
+		break;
 	default:
 		return 0;
 	}
@@ -1155,6 +1636,9 @@ static int __init uncore_cpu_init(void)
 	case 42: /* Sandy Bridge */
 		msr_uncores = snb_msr_uncores;
 		break;
+	case 45: /* Sandy Birdge-EP */
+		msr_uncores = snbep_msr_uncores;
+		break;
 	default:
 		return 0;
 	}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index aa01df87b8de..4d52db0d1dfe 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -65,6 +65,92 @@
 #define NHM_UNC_PERFEVTSEL0                     0x3c0
 #define NHM_UNC_UNCORE_PMC0                     0x3b0
 
+/* SNB-EP Box level control */
+#define SNBEP_PMON_BOX_CTL_RST_CTRL	(1 << 0)
+#define SNBEP_PMON_BOX_CTL_RST_CTRS	(1 << 1)
+#define SNBEP_PMON_BOX_CTL_FRZ		(1 << 8)
+#define SNBEP_PMON_BOX_CTL_FRZ_EN	(1 << 16)
+#define SNBEP_PMON_BOX_CTL_INT		(SNBEP_PMON_BOX_CTL_RST_CTRL | \
+					 SNBEP_PMON_BOX_CTL_RST_CTRS | \
+					 SNBEP_PMON_BOX_CTL_FRZ_EN)
+/* SNB-EP event control */
+#define SNBEP_PMON_CTL_EV_SEL_MASK	0x000000ff
+#define SNBEP_PMON_CTL_UMASK_MASK	0x0000ff00
+#define SNBEP_PMON_CTL_RST		(1 << 17)
+#define SNBEP_PMON_CTL_EDGE_DET		(1 << 18)
+#define SNBEP_PMON_CTL_EV_SEL_EXT	(1 << 21)	/* only for QPI */
+#define SNBEP_PMON_CTL_EN		(1 << 22)
+#define SNBEP_PMON_CTL_INVERT		(1 << 23)
+#define SNBEP_PMON_CTL_TRESH_MASK	0xff000000
+#define SNBEP_PMON_RAW_EVENT_MASK	(SNBEP_PMON_CTL_EV_SEL_MASK | \
+					 SNBEP_PMON_CTL_UMASK_MASK | \
+					 SNBEP_PMON_CTL_EDGE_DET | \
+					 SNBEP_PMON_CTL_INVERT | \
+					 SNBEP_PMON_CTL_TRESH_MASK)
+
+/* SNB-EP Ubox event control */
+#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK		0x1f000000
+#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK		\
+				(SNBEP_PMON_CTL_EV_SEL_MASK | \
+				 SNBEP_PMON_CTL_UMASK_MASK | \
+				 SNBEP_PMON_CTL_EDGE_DET | \
+				 SNBEP_PMON_CTL_INVERT | \
+				 SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
+
+/* SNB-EP PCU event control */
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK	0x0000c000
+#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK	0x1f000000
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT	(1 << 30)
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET	(1 << 31)
+#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK	\
+				(SNBEP_PMON_CTL_EV_SEL_MASK | \
+				 SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
+				 SNBEP_PMON_CTL_EDGE_DET | \
+				 SNBEP_PMON_CTL_INVERT | \
+				 SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
+				 SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
+				 SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
+
+/* SNB-EP pci control register */
+#define SNBEP_PCI_PMON_BOX_CTL			0xf4
+#define SNBEP_PCI_PMON_CTL0			0xd8
+/* SNB-EP pci counter register */
+#define SNBEP_PCI_PMON_CTR0			0xa0
+
+/* SNB-EP home agent register */
+#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0	0x40
+#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1	0x44
+#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH	0x48
+/* SNB-EP memory controller register */
+#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL		0xf0
+#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR		0xd0
+/* SNB-EP QPI register */
+#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0		0x228
+#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1		0x22c
+#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0		0x238
+#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1		0x23c
+
+/* SNB-EP Ubox register */
+#define SNBEP_U_MSR_PMON_CTR0			0xc16
+#define SNBEP_U_MSR_PMON_CTL0			0xc10
+
+#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL		0xc08
+#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR		0xc09
+
+/* SNB-EP Cbo register */
+#define SNBEP_C0_MSR_PMON_CTR0			0xd16
+#define SNBEP_C0_MSR_PMON_CTL0			0xd10
+#define SNBEP_C0_MSR_PMON_BOX_FILTER		0xd14
+#define SNBEP_C0_MSR_PMON_BOX_CTL		0xd04
+#define SNBEP_CBO_MSR_OFFSET			0x20
+
+/* SNB-EP PCU register */
+#define SNBEP_PCU_MSR_PMON_CTR0			0xc36
+#define SNBEP_PCU_MSR_PMON_CTL0			0xc30
+#define SNBEP_PCU_MSR_PMON_BOX_FILTER		0xc34
+#define SNBEP_PCU_MSR_PMON_BOX_CTL		0xc24
+#define SNBEP_PCU_MSR_CORE_C3_CTR		0x3fc
+#define SNBEP_PCU_MSR_CORE_C6_CTR		0x3fd
 
 struct intel_uncore_ops;
 struct intel_uncore_pmu;
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ab741b0d0074..5f187026b812 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2755,6 +2755,17 @@
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB7	0x3c27
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB8	0x3c2e
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB9	0x3c2f
+#define PCI_DEVICE_ID_INTEL_UNC_HA	0x3c46
+#define PCI_DEVICE_ID_INTEL_UNC_IMC0	0x3cb0
+#define PCI_DEVICE_ID_INTEL_UNC_IMC1	0x3cb1
+#define PCI_DEVICE_ID_INTEL_UNC_IMC2	0x3cb4
+#define PCI_DEVICE_ID_INTEL_UNC_IMC3	0x3cb5
+#define PCI_DEVICE_ID_INTEL_UNC_QPI0	0x3c41
+#define PCI_DEVICE_ID_INTEL_UNC_QPI1	0x3c42
+#define PCI_DEVICE_ID_INTEL_UNC_R2PCIE	0x3c43
+#define PCI_DEVICE_ID_INTEL_UNC_R3QPI0	0x3c44
+#define PCI_DEVICE_ID_INTEL_UNC_R3QPI1	0x3c45
+#define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX	0x3ce0
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
 #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
-- 
cgit v1.2.3


From 46010ab2607aed9484816a8f1679c2ec3c8e7dcf Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 15 Jun 2012 14:31:38 +0800
Subject: perf/tool: Use data struct for arg passing in event parse function

Moving all the bison arguments into the structure. In upcomming
patches we are going to:

  - add more arguments
  - reuse the grammer for term parsing

so it's more clear to pack/separate related arguments.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1339741902-8449-10-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/perf/util/parse-events.c | 16 +++++++------
 tools/perf/util/parse-events.h |  8 +++++--
 tools/perf/util/parse-events.y | 52 ++++++++++++++++++++++++++++--------------
 3 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 05dbc8b3c767..c71b29ad26ec 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -26,7 +26,7 @@ struct event_symbol {
 #ifdef PARSER_DEBUG
 extern int parse_events_debug;
 #endif
-int parse_events_parse(struct list_head *list, int *idx);
+int parse_events_parse(void *data);
 
 #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x
 #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x
@@ -789,25 +789,27 @@ int parse_events_modifier(struct list_head *list, char *str)
 
 int parse_events(struct perf_evlist *evlist, const char *str, int unset __used)
 {
-	LIST_HEAD(list);
-	LIST_HEAD(list_tmp);
+	struct parse_events_data__events data = {
+		.list = LIST_HEAD_INIT(data.list),
+		.idx  = evlist->nr_entries,
+	};
 	YY_BUFFER_STATE buffer;
-	int ret, idx = evlist->nr_entries;
+	int ret;
 
 	buffer = parse_events__scan_string(str);
 
 #ifdef PARSER_DEBUG
 	parse_events_debug = 1;
 #endif
-	ret = parse_events_parse(&list, &idx);
+	ret = parse_events_parse(&data);
 
 	parse_events__flush_buffer(buffer);
 	parse_events__delete_buffer(buffer);
 	parse_events_lex_destroy();
 
 	if (!ret) {
-		int entries = idx - evlist->nr_entries;
-		perf_evlist__splice_list_tail(evlist, &list, entries);
+		int entries = data.idx - evlist->nr_entries;
+		perf_evlist__splice_list_tail(evlist, &data.list, entries);
 		return 0;
 	}
 
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 8cac57ab4ee6..dc3c83a0ab8a 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -63,6 +63,11 @@ struct parse_events__term {
 	struct list_head list;
 };
 
+struct parse_events_data__events {
+	struct list_head list;
+	int idx;
+};
+
 int parse_events__is_hardcoded_term(struct parse_events__term *term);
 int parse_events__term_num(struct parse_events__term **_term,
 			   int type_term, char *config, long num);
@@ -83,8 +88,7 @@ int parse_events_add_pmu(struct list_head **list, int *idx,
 			 char *pmu , struct list_head *head_config);
 void parse_events_update_lists(struct list_head *list_event,
 			       struct list_head *list_all);
-void parse_events_error(struct list_head *list_all,
-			int *idx, char const *msg);
+void parse_events_error(void *data, char const *msg);
 int parse_events__test(void);
 
 void print_events(const char *event_glob);
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 362cc59332ae..e533bf72ba9c 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -1,7 +1,6 @@
 
 %name-prefix "parse_events_"
-%parse-param {struct list_head *list_all}
-%parse-param {int *idx}
+%parse-param {void *_data}
 
 %{
 
@@ -64,18 +63,22 @@ events ',' event | event
 event:
 event_def PE_MODIFIER_EVENT
 {
+	struct parse_events_data__events *data = _data;
+
 	/*
 	 * Apply modifier on all events added by single event definition
 	 * (there could be more events added for multiple tracepoint
 	 * definitions via '*?'.
 	 */
 	ABORT_ON(parse_events_modifier($1, $2));
-	parse_events_update_lists($1, list_all);
+	parse_events_update_lists($1, &data->list);
 }
 |
 event_def
 {
-	parse_events_update_lists($1, list_all);
+	struct parse_events_data__events *data = _data;
+
+	parse_events_update_lists($1, &data->list);
 }
 
 event_def: event_pmu |
@@ -89,9 +92,10 @@ event_def: event_pmu |
 event_pmu:
 PE_NAME '/' event_config '/'
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_pmu(&list, idx, $1, $3));
+	ABORT_ON(parse_events_add_pmu(&list, &data->idx, $1, $3));
 	parse_events__free_terms($3);
 	$$ = list;
 }
@@ -99,91 +103,106 @@ PE_NAME '/' event_config '/'
 event_legacy_symbol:
 PE_VALUE_SYM '/' event_config '/'
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 	int type = $1 >> 16;
 	int config = $1 & 255;
 
-	ABORT_ON(parse_events_add_numeric(&list, idx, type, config, $3));
+	ABORT_ON(parse_events_add_numeric(&list, &data->idx,
+					  type, config, $3));
 	parse_events__free_terms($3);
 	$$ = list;
 }
 |
 PE_VALUE_SYM sep_slash_dc
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 	int type = $1 >> 16;
 	int config = $1 & 255;
 
-	ABORT_ON(parse_events_add_numeric(&list, idx, type, config, NULL));
+	ABORT_ON(parse_events_add_numeric(&list, &data->idx,
+					  type, config, NULL));
 	$$ = list;
 }
 
 event_legacy_cache:
 PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_cache(&list, idx, $1, $3, $5));
+	ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, $5));
 	$$ = list;
 }
 |
 PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_cache(&list, idx, $1, $3, NULL));
+	ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, NULL));
 	$$ = list;
 }
 |
 PE_NAME_CACHE_TYPE
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_cache(&list, idx, $1, NULL, NULL));
+	ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, NULL, NULL));
 	$$ = list;
 }
 
 event_legacy_mem:
 PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_breakpoint(&list, idx, (void *) $2, $4));
+	ABORT_ON(parse_events_add_breakpoint(&list, &data->idx,
+					     (void *) $2, $4));
 	$$ = list;
 }
 |
 PE_PREFIX_MEM PE_VALUE sep_dc
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_breakpoint(&list, idx, (void *) $2, NULL));
+	ABORT_ON(parse_events_add_breakpoint(&list, &data->idx,
+					     (void *) $2, NULL));
 	$$ = list;
 }
 
 event_legacy_tracepoint:
 PE_NAME ':' PE_NAME
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_tracepoint(&list, idx, $1, $3));
+	ABORT_ON(parse_events_add_tracepoint(&list, &data->idx, $1, $3));
 	$$ = list;
 }
 
 event_legacy_numeric:
 PE_VALUE ':' PE_VALUE
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_numeric(&list, idx, $1, $3, NULL));
+	ABORT_ON(parse_events_add_numeric(&list, &data->idx, $1, $3, NULL));
 	$$ = list;
 }
 
 event_legacy_raw:
 PE_RAW
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_numeric(&list, idx, PERF_TYPE_RAW, $1, NULL));
+	ABORT_ON(parse_events_add_numeric(&list, &data->idx,
+					  PERF_TYPE_RAW, $1, NULL));
 	$$ = list;
 }
 
@@ -267,8 +286,7 @@ sep_slash_dc: '/' | ':' |
 
 %%
 
-void parse_events_error(struct list_head *list_all __used,
-			int *idx __used,
+void parse_events_error(void *data __used,
 			char const *msg __used)
 {
 }
-- 
cgit v1.2.3


From ac20de6fff445d6deb0c44c25946d198f79f2f00 Mon Sep 17 00:00:00 2001
From: Zheng Yan <zheng.z.yan@intel.com>
Date: Fri, 15 Jun 2012 14:31:39 +0800
Subject: perf/tool: Make the event parser re-entrant

Make the event parser reentrant by creating separate
scanner for each parsing. The scanner is passed to the bison
as and argument to the lexer.

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
[ Cleaned up the patch. ]
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1339741902-8449-11-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/perf/util/parse-events.c |  35 +++++++++----
 tools/perf/util/parse-events.h |   2 +-
 tools/perf/util/parse-events.l | 116 ++++++++++++++++++++++++-----------------
 tools/perf/util/parse-events.y |   9 ++--
 4 files changed, 98 insertions(+), 64 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index c71b29ad26ec..ca8665e19c0d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -11,6 +11,7 @@
 #include "cache.h"
 #include "header.h"
 #include "debugfs.h"
+#include "parse-events-bison.h"
 #include "parse-events-flex.h"
 #include "pmu.h"
 
@@ -26,7 +27,7 @@ struct event_symbol {
 #ifdef PARSER_DEBUG
 extern int parse_events_debug;
 #endif
-int parse_events_parse(void *data);
+int parse_events_parse(void *data, void *scanner);
 
 #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x
 #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x
@@ -787,26 +788,38 @@ int parse_events_modifier(struct list_head *list, char *str)
 	return 0;
 }
 
-int parse_events(struct perf_evlist *evlist, const char *str, int unset __used)
+static int parse_events__scanner(const char *str, void *data)
 {
-	struct parse_events_data__events data = {
-		.list = LIST_HEAD_INIT(data.list),
-		.idx  = evlist->nr_entries,
-	};
 	YY_BUFFER_STATE buffer;
+	void *scanner;
 	int ret;
 
-	buffer = parse_events__scan_string(str);
+	ret = parse_events_lex_init(&scanner);
+	if (ret)
+		return ret;
+
+	buffer = parse_events__scan_string(str, scanner);
 
 #ifdef PARSER_DEBUG
 	parse_events_debug = 1;
 #endif
-	ret = parse_events_parse(&data);
+	ret = parse_events_parse(data, scanner);
+
+	parse_events__flush_buffer(buffer, scanner);
+	parse_events__delete_buffer(buffer, scanner);
+	parse_events_lex_destroy(scanner);
+	return ret;
+}
 
-	parse_events__flush_buffer(buffer);
-	parse_events__delete_buffer(buffer);
-	parse_events_lex_destroy();
+int parse_events(struct perf_evlist *evlist, const char *str, int unset __used)
+{
+	struct parse_events_data__events data = {
+		.list = LIST_HEAD_INIT(data.list),
+		.idx  = evlist->nr_entries,
+	};
+	int ret;
 
+	ret = parse_events__scanner(str, &data);
 	if (!ret) {
 		int entries = data.idx - evlist->nr_entries;
 		perf_evlist__splice_list_tail(evlist, &data.list, entries);
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index dc3c83a0ab8a..fa2b19b862e2 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -88,7 +88,7 @@ int parse_events_add_pmu(struct list_head **list, int *idx,
 			 char *pmu , struct list_head *head_config);
 void parse_events_update_lists(struct list_head *list_event,
 			       struct list_head *list_all);
-void parse_events_error(void *data, char const *msg);
+void parse_events_error(void *data, void *scanner, char const *msg);
 int parse_events__test(void);
 
 void print_events(const char *event_glob);
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 618a8e788399..329794eea711 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -1,4 +1,6 @@
 
+%option reentrant
+%option bison-bridge
 %option prefix="parse_events_"
 %option stack
 
@@ -8,7 +10,10 @@
 #include "parse-events-bison.h"
 #include "parse-events.h"
 
-static int __value(char *str, int base, int token)
+char *parse_events_get_text(yyscan_t yyscanner);
+YYSTYPE *parse_events_get_lval(yyscan_t yyscanner);
+
+static int __value(YYSTYPE *yylval, char *str, int base, int token)
 {
 	long num;
 
@@ -17,35 +22,48 @@ static int __value(char *str, int base, int token)
 	if (errno)
 		return PE_ERROR;
 
-	parse_events_lval.num = num;
+	yylval->num = num;
 	return token;
 }
 
-static int value(int base)
+static int value(yyscan_t scanner, int base)
 {
-	return __value(parse_events_text, base, PE_VALUE);
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	return __value(yylval, text, base, PE_VALUE);
 }
 
-static int raw(void)
+static int raw(yyscan_t scanner)
 {
-	return __value(parse_events_text + 1, 16, PE_RAW);
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	return __value(yylval, text + 1, 16, PE_RAW);
 }
 
-static int str(int token)
+static int str(yyscan_t scanner, int token)
 {
-	parse_events_lval.str = strdup(parse_events_text);
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	yylval->str = strdup(text);
 	return token;
 }
 
-static int sym(int type, int config)
+static int sym(yyscan_t scanner, int type, int config)
 {
-	parse_events_lval.num = (type << 16) + config;
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+
+	yylval->num = (type << 16) + config;
 	return PE_VALUE_SYM;
 }
 
-static int term(int type)
+static int term(yyscan_t scanner, int type)
 {
-	parse_events_lval.num = type;
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+
+	yylval->num = type;
 	return PE_TERM;
 }
 
@@ -61,25 +79,25 @@ modifier_event	[ukhpGH]{1,8}
 modifier_bp	[rwx]
 
 %%
-cpu-cycles|cycles				{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); }
-stalled-cycles-frontend|idle-cycles-frontend	{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
-stalled-cycles-backend|idle-cycles-backend	{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
-instructions					{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); }
-cache-references				{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); }
-cache-misses					{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); }
-branch-instructions|branches			{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
-branch-misses					{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); }
-bus-cycles					{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); }
-ref-cycles					{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); }
-cpu-clock					{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); }
-task-clock					{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); }
-page-faults|faults				{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); }
-minor-faults					{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); }
-major-faults					{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); }
-context-switches|cs				{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); }
-cpu-migrations|migrations			{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); }
-alignment-faults				{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
-emulation-faults				{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
+cpu-cycles|cycles				{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); }
+stalled-cycles-frontend|idle-cycles-frontend	{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
+stalled-cycles-backend|idle-cycles-backend	{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
+instructions					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); }
+cache-references				{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); }
+cache-misses					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); }
+branch-instructions|branches			{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
+branch-misses					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); }
+bus-cycles					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); }
+ref-cycles					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); }
+cpu-clock					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); }
+task-clock					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); }
+page-faults|faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); }
+minor-faults					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); }
+major-faults					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); }
+context-switches|cs				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); }
+cpu-migrations|migrations			{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); }
+alignment-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
+emulation-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
 
 L1-dcache|l1-d|l1d|L1-data		|
 L1-icache|l1-i|l1i|L1-instruction	|
@@ -87,14 +105,14 @@ LLC|L2					|
 dTLB|d-tlb|Data-TLB			|
 iTLB|i-tlb|Instruction-TLB		|
 branch|branches|bpu|btb|bpc		|
-node					{ return str(PE_NAME_CACHE_TYPE); }
+node					{ return str(yyscanner, PE_NAME_CACHE_TYPE); }
 
 load|loads|read				|
 store|stores|write			|
 prefetch|prefetches			|
 speculative-read|speculative-load	|
 refs|Reference|ops|access		|
-misses|miss				{ return str(PE_NAME_CACHE_OP_RESULT); }
+misses|miss				{ return str(yyscanner, PE_NAME_CACHE_OP_RESULT); }
 
 	/*
 	 * These are event config hardcoded term names to be specified
@@ -102,20 +120,20 @@ misses|miss				{ return str(PE_NAME_CACHE_OP_RESULT); }
 	 * so we can put them here directly. In case the we have a conflict
 	 * in future, this needs to go into '//' condition block.
 	 */
-config			{ return term(PARSE_EVENTS__TERM_TYPE_CONFIG); }
-config1			{ return term(PARSE_EVENTS__TERM_TYPE_CONFIG1); }
-config2			{ return term(PARSE_EVENTS__TERM_TYPE_CONFIG2); }
-name			{ return term(PARSE_EVENTS__TERM_TYPE_NAME); }
-period			{ return term(PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
-branch_type		{ return term(PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
+config			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
+config1			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
+config2			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
+name			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
+period			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
+branch_type		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
 
 mem:			{ BEGIN(mem); return PE_PREFIX_MEM; }
-r{num_raw_hex}		{ return raw(); }
-{num_dec}		{ return value(10); }
-{num_hex}		{ return value(16); }
+r{num_raw_hex}		{ return raw(yyscanner); }
+{num_dec}		{ return value(yyscanner, 10); }
+{num_hex}		{ return value(yyscanner, 16); }
 
-{modifier_event}	{ return str(PE_MODIFIER_EVENT); }
-{name}			{ return str(PE_NAME); }
+{modifier_event}	{ return str(yyscanner, PE_MODIFIER_EVENT); }
+{name}			{ return str(yyscanner, PE_NAME); }
 "/"			{ return '/'; }
 -			{ return '-'; }
 ,			{ return ','; }
@@ -123,17 +141,17 @@ r{num_raw_hex}		{ return raw(); }
 =			{ return '='; }
 
 <mem>{
-{modifier_bp}		{ return str(PE_MODIFIER_BP); }
+{modifier_bp}		{ return str(yyscanner, PE_MODIFIER_BP); }
 :			{ return ':'; }
-{num_dec}		{ return value(10); }
-{num_hex}		{ return value(16); }
+{num_dec}		{ return value(yyscanner, 10); }
+{num_hex}		{ return value(yyscanner, 16); }
 	/*
 	 * We need to separate 'mem:' scanner part, in order to get specific
 	 * modifier bits parsed out. Otherwise we would need to handle PE_NAME
 	 * and we'd need to parse it manually. During the escape from <mem>
 	 * state we need to put the escaping char back, so we dont miss it.
 	 */
-.			{ unput(*parse_events_text); BEGIN(INITIAL); }
+.			{ unput(*yytext); BEGIN(INITIAL); }
 	/*
 	 * We destroy the scanner after reaching EOF,
 	 * but anyway just to be sure get back to INIT state.
@@ -143,7 +161,7 @@ r{num_raw_hex}		{ return raw(); }
 
 %%
 
-int parse_events_wrap(void)
+int parse_events_wrap(void *scanner __used)
 {
 	return 1;
 }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index e533bf72ba9c..2a93d5c8ccda 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -1,6 +1,8 @@
-
+%pure-parser
 %name-prefix "parse_events_"
 %parse-param {void *_data}
+%parse-param {void *scanner}
+%lex-param {void* scanner}
 
 %{
 
@@ -11,8 +13,9 @@
 #include "types.h"
 #include "util.h"
 #include "parse-events.h"
+#include "parse-events-bison.h"
 
-extern int parse_events_lex (void);
+extern int parse_events_lex (YYSTYPE* lvalp, void* scanner);
 
 #define ABORT_ON(val) \
 do { \
@@ -286,7 +289,7 @@ sep_slash_dc: '/' | ':' |
 
 %%
 
-void parse_events_error(void *data __used,
+void parse_events_error(void *data __used, void *scanner __used,
 			char const *msg __used)
 {
 }
-- 
cgit v1.2.3


From 90e2b22dee908c13df256140a0d6527e3e8ea3f4 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 15 Jun 2012 14:31:40 +0800
Subject: perf/tool: Add support to reuse event grammar to parse out terms

We want to reuse the event grammar for parsing aliased terms.
The obvious reason is we dont need to add new code when there's
already support for this in event grammar.

Doing this by adding terms and event start entries into event
parse grammar. The grammar forks on the begining based on the
starting token, which is supplied via bison interface into the
lexer.  The lexer then returns the starting token as the first
token, thus making the grammar switch accordingly.

Currently 2 starting tokens/grammars are supported:

	PE_START_TERMS, PE_START_EVENTS

The PE_START_TERMS related grammar uses 'event_config' part
of the grammar for term parsing.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1339741902-8449-12-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/perf/util/parse-events.c | 28 +++++++++++++++++++++++++---
 tools/perf/util/parse-events.h |  5 +++++
 tools/perf/util/parse-events.l | 13 +++++++++++++
 tools/perf/util/parse-events.y | 12 ++++++++++++
 4 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index ca8665e19c0d..d002170adb3f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -12,6 +12,7 @@
 #include "header.h"
 #include "debugfs.h"
 #include "parse-events-bison.h"
+#define YY_EXTRA_TYPE int
 #include "parse-events-flex.h"
 #include "pmu.h"
 
@@ -788,13 +789,13 @@ int parse_events_modifier(struct list_head *list, char *str)
 	return 0;
 }
 
-static int parse_events__scanner(const char *str, void *data)
+static int parse_events__scanner(const char *str, void *data, int start_token)
 {
 	YY_BUFFER_STATE buffer;
 	void *scanner;
 	int ret;
 
-	ret = parse_events_lex_init(&scanner);
+	ret = parse_events_lex_init_extra(start_token, &scanner);
 	if (ret)
 		return ret;
 
@@ -811,6 +812,27 @@ static int parse_events__scanner(const char *str, void *data)
 	return ret;
 }
 
+/*
+ * parse event config string, return a list of event terms.
+ */
+int parse_events_terms(struct list_head *terms, const char *str)
+{
+	struct parse_events_data__terms data = {
+		.terms = NULL,
+	};
+	int ret;
+
+	ret = parse_events__scanner(str, &data, PE_START_TERMS);
+	if (!ret) {
+		list_splice(data.terms, terms);
+		free(data.terms);
+		return 0;
+	}
+
+	parse_events__free_terms(data.terms);
+	return ret;
+}
+
 int parse_events(struct perf_evlist *evlist, const char *str, int unset __used)
 {
 	struct parse_events_data__events data = {
@@ -819,7 +841,7 @@ int parse_events(struct perf_evlist *evlist, const char *str, int unset __used)
 	};
 	int ret;
 
-	ret = parse_events__scanner(str, &data);
+	ret = parse_events__scanner(str, &data, PE_START_EVENTS);
 	if (!ret) {
 		int entries = data.idx - evlist->nr_entries;
 		perf_evlist__splice_list_tail(evlist, &data.list, entries);
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index fa2b19b862e2..9896edadbe62 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -33,6 +33,7 @@ extern int parse_events_option(const struct option *opt, const char *str,
 			       int unset);
 extern int parse_events(struct perf_evlist *evlist, const char *str,
 			int unset);
+extern int parse_events_terms(struct list_head *terms, const char *str);
 extern int parse_filter(const struct option *opt, const char *str, int unset);
 
 #define EVENTS_HELP_MAX (128*1024)
@@ -68,6 +69,10 @@ struct parse_events_data__events {
 	int idx;
 };
 
+struct parse_events_data__terms {
+	struct list_head *terms;
+};
+
 int parse_events__is_hardcoded_term(struct parse_events__term *term);
 int parse_events__term_num(struct parse_events__term **_term,
 			   int type_term, char *config, long num);
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 329794eea711..488362e14133 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -79,6 +79,19 @@ modifier_event	[ukhpGH]{1,8}
 modifier_bp	[rwx]
 
 %%
+
+%{
+	{
+		int start_token;
+
+		start_token = (int) parse_events_get_extra(yyscanner);
+		if (start_token) {
+			parse_events_set_extra(NULL, yyscanner);
+			return start_token;
+		}
+         }
+%}
+
 cpu-cycles|cycles				{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); }
 stalled-cycles-frontend|idle-cycles-frontend	{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
 stalled-cycles-backend|idle-cycles-backend	{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 2a93d5c8ccda..9525c455d27f 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -25,6 +25,7 @@ do { \
 
 %}
 
+%token PE_START_EVENTS PE_START_TERMS
 %token PE_VALUE PE_VALUE_SYM PE_RAW PE_TERM
 %token PE_NAME
 %token PE_MODIFIER_EVENT PE_MODIFIER_BP
@@ -60,6 +61,11 @@ do { \
 }
 %%
 
+start:
+PE_START_EVENTS events
+|
+PE_START_TERMS  terms
+
 events:
 events ',' event | event
 
@@ -209,6 +215,12 @@ PE_RAW
 	$$ = list;
 }
 
+terms: event_config
+{
+	struct parse_events_data__terms *data = _data;
+	data->terms = $1;
+}
+
 event_config:
 event_config ',' event_term
 {
-- 
cgit v1.2.3


From a6146d5040cce560f700221158d77dd335eed332 Mon Sep 17 00:00:00 2001
From: Zheng Yan <zheng.z.yan@intel.com>
Date: Fri, 15 Jun 2012 14:31:41 +0800
Subject: perf/tool: Add PMU event alias support

Add support to specify alias term within the event description.

The definition of pmu event alias is located at:

  ${sysfs_mount}/bus/event_source/devices/${pmu}/events/

Each file in the 'events' directory defines a event alias. Its contents
are like:

  config=1,config1=2

Using pmu event alias, an event can be now specified like:

  uncore/CLOCKTICKS/ or uncore/event=CLOCKTICKS/

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
[ Cleaned it up. ]
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1339741902-8449-13-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/perf/util/parse-events.c |  10 +++
 tools/perf/util/parse-events.h |   2 +
 tools/perf/util/pmu.c          | 166 +++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/pmu.h          |  11 ++-
 4 files changed, 188 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index d002170adb3f..3339424cc421 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -701,6 +701,9 @@ int parse_events_add_pmu(struct list_head **list, int *idx,
 
 	memset(&attr, 0, sizeof(attr));
 
+	if (perf_pmu__check_alias(pmu, head_config))
+		return -EINVAL;
+
 	/*
 	 * Configure hardcoded terms first, no need to check
 	 * return value when called with fail == 0 ;)
@@ -1143,6 +1146,13 @@ int parse_events__term_str(struct parse_events__term **term,
 			config, str, 0);
 }
 
+int parse_events__term_clone(struct parse_events__term **new,
+			     struct parse_events__term *term)
+{
+	return new_term(new, term->type_val, term->type_term, term->config,
+			term->val.str, term->val.num);
+}
+
 void parse_events__free_terms(struct list_head *terms)
 {
 	struct parse_events__term *term, *h;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 9896edadbe62..a2c71684f6a4 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -78,6 +78,8 @@ int parse_events__term_num(struct parse_events__term **_term,
 			   int type_term, char *config, long num);
 int parse_events__term_str(struct parse_events__term **_term,
 			   int type_term, char *config, char *str);
+int parse_events__term_clone(struct parse_events__term **new,
+			     struct parse_events__term *term);
 void parse_events__free_terms(struct list_head *terms);
 int parse_events_modifier(struct list_head *list, char *str);
 int parse_events_add_tracepoint(struct list_head **list, int *idx,
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index a119a5371699..74d0948ec368 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -80,6 +80,114 @@ static int pmu_format(char *name, struct list_head *format)
 	return 0;
 }
 
+static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file)
+{
+	struct perf_pmu__alias *alias;
+	char buf[256];
+	int ret;
+
+	ret = fread(buf, 1, sizeof(buf), file);
+	if (ret == 0)
+		return -EINVAL;
+	buf[ret] = 0;
+
+	alias = malloc(sizeof(*alias));
+	if (!alias)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&alias->terms);
+	ret = parse_events_terms(&alias->terms, buf);
+	if (ret) {
+		free(alias);
+		return ret;
+	}
+
+	alias->name = strdup(name);
+	list_add_tail(&alias->list, list);
+	return 0;
+}
+
+/*
+ * Process all the sysfs attributes located under the directory
+ * specified in 'dir' parameter.
+ */
+static int pmu_aliases_parse(char *dir, struct list_head *head)
+{
+	struct dirent *evt_ent;
+	DIR *event_dir;
+	int ret = 0;
+
+	event_dir = opendir(dir);
+	if (!event_dir)
+		return -EINVAL;
+
+	while (!ret && (evt_ent = readdir(event_dir))) {
+		char path[PATH_MAX];
+		char *name = evt_ent->d_name;
+		FILE *file;
+
+		if (!strcmp(name, ".") || !strcmp(name, ".."))
+			continue;
+
+		snprintf(path, PATH_MAX, "%s/%s", dir, name);
+
+		ret = -EINVAL;
+		file = fopen(path, "r");
+		if (!file)
+			break;
+		ret = perf_pmu__new_alias(head, name, file);
+		fclose(file);
+	}
+
+	closedir(event_dir);
+	return ret;
+}
+
+/*
+ * Reading the pmu event aliases definition, which should be located at:
+ * /sys/bus/event_source/devices/<dev>/events as sysfs group attributes.
+ */
+static int pmu_aliases(char *name, struct list_head *head)
+{
+	struct stat st;
+	char path[PATH_MAX];
+	const char *sysfs;
+
+	sysfs = sysfs_find_mountpoint();
+	if (!sysfs)
+		return -1;
+
+	snprintf(path, PATH_MAX,
+		 "%s/bus/event_source/devices/%s/events", sysfs, name);
+
+	if (stat(path, &st) < 0)
+		return -1;
+
+	if (pmu_aliases_parse(path, head))
+		return -1;
+
+	return 0;
+}
+
+static int pmu_alias_terms(struct perf_pmu__alias *alias,
+			   struct list_head *terms)
+{
+	struct parse_events__term *term, *clone;
+	LIST_HEAD(list);
+	int ret;
+
+	list_for_each_entry(term, &alias->terms, list) {
+		ret = parse_events__term_clone(&clone, term);
+		if (ret) {
+			parse_events__free_terms(&list);
+			return ret;
+		}
+		list_add_tail(&clone->list, &list);
+	}
+	list_splice(&list, terms);
+	return 0;
+}
+
 /*
  * Reading/parsing the default pmu type value, which should be
  * located at:
@@ -118,6 +226,7 @@ static struct perf_pmu *pmu_lookup(char *name)
 {
 	struct perf_pmu *pmu;
 	LIST_HEAD(format);
+	LIST_HEAD(aliases);
 	__u32 type;
 
 	/*
@@ -135,8 +244,12 @@ static struct perf_pmu *pmu_lookup(char *name)
 	if (!pmu)
 		return NULL;
 
+	pmu_aliases(name, &aliases);
+
 	INIT_LIST_HEAD(&pmu->format);
+	INIT_LIST_HEAD(&pmu->aliases);
 	list_splice(&format, &pmu->format);
+	list_splice(&aliases, &pmu->aliases);
 	pmu->name = strdup(name);
 	pmu->type = type;
 	return pmu;
@@ -279,6 +392,59 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
 	return pmu_config(&pmu->format, attr, head_terms);
 }
 
+static struct perf_pmu__alias *pmu_find_alias(struct perf_pmu *pmu,
+					      struct parse_events__term *term)
+{
+	struct perf_pmu__alias *alias;
+	char *name;
+
+	if (parse_events__is_hardcoded_term(term))
+		return NULL;
+
+	if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+		if (term->val.num != 1)
+			return NULL;
+		if (pmu_find_format(&pmu->format, term->config))
+			return NULL;
+		name = term->config;
+	} else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
+		if (strcasecmp(term->config, "event"))
+			return NULL;
+		name = term->val.str;
+	} else {
+		return NULL;
+	}
+
+	list_for_each_entry(alias, &pmu->aliases, list) {
+		if (!strcasecmp(alias->name, name))
+			return alias;
+	}
+	return NULL;
+}
+
+/*
+ * Find alias in the terms list and replace it with the terms
+ * defined for the alias
+ */
+int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms)
+{
+	struct parse_events__term *term, *h;
+	struct perf_pmu__alias *alias;
+	int ret;
+
+	list_for_each_entry_safe(term, h, head_terms, list) {
+		alias = pmu_find_alias(pmu, term);
+		if (!alias)
+			continue;
+		ret = pmu_alias_terms(alias, &term->list);
+		if (ret)
+			return ret;
+		list_del(&term->list);
+		free(term);
+	}
+	return 0;
+}
+
 int perf_pmu__new_format(struct list_head *list, char *name,
 			 int config, unsigned long *bits)
 {
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 68c0db965e1f..535f2c5258ab 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -19,17 +19,26 @@ struct perf_pmu__format {
 	struct list_head list;
 };
 
+struct perf_pmu__alias {
+	char *name;
+	struct list_head terms;
+	struct list_head list;
+};
+
 struct perf_pmu {
 	char *name;
 	__u32 type;
 	struct list_head format;
+	struct list_head aliases;
 	struct list_head list;
 };
 
 struct perf_pmu *perf_pmu__find(char *name);
 int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
 		     struct list_head *head_terms);
-
+int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms);
+struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
+				struct list_head *head_terms);
 int perf_pmu_wrap(void);
 void perf_pmu_error(struct list_head *list, char *name, char const *msg);
 
-- 
cgit v1.2.3


From 4429392e1484319df4209d41a98244c76d0caf56 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 15 Jun 2012 14:31:42 +0800
Subject: perf/tool: Add automated test for pure terms parsing

Adding automated test for parsing terms out of the event grammar.
Also slightly changing current event parsing test functions to
follow up more generic namespace.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1339741902-8449-14-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/perf/util/parse-events-test.c | 122 ++++++++++++++++++++++++++++++++++--
 1 file changed, 117 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c
index 76b98e2a587d..af1039cdb853 100644
--- a/tools/perf/util/parse-events-test.c
+++ b/tools/perf/util/parse-events-test.c
@@ -430,6 +430,49 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist)
 	return 0;
 }
 
+static int test__checkterms_simple(struct list_head *terms)
+{
+	struct parse_events__term *term;
+
+	/* config=10 */
+	term = list_entry(terms->next, struct parse_events__term, list);
+	TEST_ASSERT_VAL("wrong type term",
+			term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG);
+	TEST_ASSERT_VAL("wrong type val",
+			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+	TEST_ASSERT_VAL("wrong val", term->val.num == 10);
+	TEST_ASSERT_VAL("wrong config", !term->config);
+
+	/* config1 */
+	term = list_entry(term->list.next, struct parse_events__term, list);
+	TEST_ASSERT_VAL("wrong type term",
+			term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG1);
+	TEST_ASSERT_VAL("wrong type val",
+			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+	TEST_ASSERT_VAL("wrong val", term->val.num == 1);
+	TEST_ASSERT_VAL("wrong config", !term->config);
+
+	/* config2=3 */
+	term = list_entry(term->list.next, struct parse_events__term, list);
+	TEST_ASSERT_VAL("wrong type term",
+			term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG2);
+	TEST_ASSERT_VAL("wrong type val",
+			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+	TEST_ASSERT_VAL("wrong val", term->val.num == 3);
+	TEST_ASSERT_VAL("wrong config", !term->config);
+
+	/* umask=1*/
+	term = list_entry(term->list.next, struct parse_events__term, list);
+	TEST_ASSERT_VAL("wrong type term",
+			term->type_term == PARSE_EVENTS__TERM_TYPE_USER);
+	TEST_ASSERT_VAL("wrong type val",
+			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+	TEST_ASSERT_VAL("wrong val", term->val.num == 1);
+	TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "umask"));
+
+	return 0;
+}
+
 struct test__event_st {
 	const char *name;
 	__u32 type;
@@ -559,7 +602,23 @@ static struct test__event_st test__events_pmu[] = {
 #define TEST__EVENTS_PMU_CNT (sizeof(test__events_pmu) / \
 			      sizeof(struct test__event_st))
 
-static int test(struct test__event_st *e)
+struct test__term {
+	const char *str;
+	__u32 type;
+	int (*check)(struct list_head *terms);
+};
+
+static struct test__term test__terms[] = {
+	[0] = {
+		.str   = "config=10,config1,config2=3,umask=1",
+		.check = test__checkterms_simple,
+	},
+};
+
+#define TEST__TERMS_CNT (sizeof(test__terms) / \
+			 sizeof(struct test__term))
+
+static int test_event(struct test__event_st *e)
 {
 	struct perf_evlist *evlist;
 	int ret;
@@ -590,7 +649,48 @@ static int test_events(struct test__event_st *events, unsigned cnt)
 		struct test__event_st *e = &events[i];
 
 		pr_debug("running test %d '%s'\n", i, e->name);
-		ret = test(e);
+		ret = test_event(e);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int test_term(struct test__term *t)
+{
+	struct list_head *terms;
+	int ret;
+
+	terms = malloc(sizeof(*terms));
+	if (!terms)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(terms);
+
+	ret = parse_events_terms(terms, t->str);
+	if (ret) {
+		pr_debug("failed to parse terms '%s', err %d\n",
+			 t->str , ret);
+		return ret;
+	}
+
+	ret = t->check(terms);
+	parse_events__free_terms(terms);
+
+	return ret;
+}
+
+static int test_terms(struct test__term *terms, unsigned cnt)
+{
+	int ret = 0;
+	unsigned i;
+
+	for (i = 0; i < cnt; i++) {
+		struct test__term *t = &terms[i];
+
+		pr_debug("running test %d '%s'\n", i, t->str);
+		ret = test_term(t);
 		if (ret)
 			break;
 	}
@@ -617,9 +717,21 @@ int parse_events__test(void)
 {
 	int ret;
 
-	ret = test_events(test__events, TEST__EVENTS_CNT);
-	if (!ret && test_pmu())
-		ret = test_events(test__events_pmu, TEST__EVENTS_PMU_CNT);
+	do {
+		ret = test_events(test__events, TEST__EVENTS_CNT);
+		if (ret)
+			break;
+
+		if (test_pmu()) {
+			ret = test_events(test__events_pmu,
+					  TEST__EVENTS_PMU_CNT);
+			if (ret)
+				break;
+		}
+
+		ret = test_terms(test__terms, TEST__TERMS_CNT);
+
+	} while (0);
 
 	return ret;
 }
-- 
cgit v1.2.3


From 2992c542fcd40777ed253f57362c65711fb8acaf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 16 Jun 2012 14:45:49 +0200
Subject: perf/x86: Lowercase uncore PMU event names

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-ucnds8gkve4x3s4biuukyph3@git.kernel.org
[ Trivial build fix ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 51 +++++++++------------------
 1 file changed, 16 insertions(+), 35 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index d34f68bf990b..28a8413ca199 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -179,22 +179,17 @@ static struct attribute *snbep_uncore_pcu_formats_attr[] = {
 };
 
 static struct uncore_event_desc snbep_uncore_imc_events[] = {
-	INTEL_UNCORE_EVENT_DESC(CLOCKTICKS, "config=0xffff"),
-	/* read */
-	INTEL_UNCORE_EVENT_DESC(CAS_COUNT_RD, "event=0x4,umask=0x3"),
-	/* write */
-	INTEL_UNCORE_EVENT_DESC(CAS_COUNT_WR, "event=0x4,umask=0xc"),
+	INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0xff,umask=0xff"),
+	INTEL_UNCORE_EVENT_DESC(cas_count_read,  "event=0x04,umask=0x03"),
+	INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
 	{ /* end: all zeroes */ },
 };
 
 static struct uncore_event_desc snbep_uncore_qpi_events[] = {
-	INTEL_UNCORE_EVENT_DESC(CLOCKTICKS, "event=0x14"),
-	/* outgoing data+nondata flits */
-	INTEL_UNCORE_EVENT_DESC(TxL_FLITS_ACTIVE, "event=0x0,umask=0x6"),
-	/* DRS data received */
-	INTEL_UNCORE_EVENT_DESC(DRS_DATA, "event=0x2,umask=0x8"),
-	/* NCB data received */
-	INTEL_UNCORE_EVENT_DESC(NCB_DATA, "event=0x3,umask=0x4"),
+	INTEL_UNCORE_EVENT_DESC(clockticks,       "event=0x14"),
+	INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"),
+	INTEL_UNCORE_EVENT_DESC(drs_data,         "event=0x02,umask=0x08"),
+	INTEL_UNCORE_EVENT_DESC(ncb_data,         "event=0x03,umask=0x04"),
 	{ /* end: all zeroes */ },
 };
 
@@ -621,29 +616,15 @@ static struct attribute_group nhm_uncore_format_group = {
 };
 
 static struct uncore_event_desc nhm_uncore_events[] = {
-	INTEL_UNCORE_EVENT_DESC(CLOCKTICKS, "config=0xffff"),
-	/* full cache line writes to DRAM */
-	INTEL_UNCORE_EVENT_DESC(QMC_WRITES_FULL_ANY, "event=0x2f,umask=0xf"),
-	/* Quickpath Memory Controller normal priority read requests */
-	INTEL_UNCORE_EVENT_DESC(QMC_NORMAL_READS_ANY, "event=0x2c,umask=0xf"),
-	/* Quickpath Home Logic read requests from the IOH */
-	INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_IOH_READS,
-				"event=0x20,umask=0x1"),
-	/* Quickpath Home Logic write requests from the IOH */
-	INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_IOH_WRITES,
-				"event=0x20,umask=0x2"),
-	/* Quickpath Home Logic read requests from a remote socket */
-	INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_REMOTE_READS,
-				"event=0x20,umask=0x4"),
-	/* Quickpath Home Logic write requests from a remote socket */
-	INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_REMOTE_WRITES,
-				"event=0x20,umask=0x8"),
-	/* Quickpath Home Logic read requests from the local socket */
-	INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_LOCAL_READS,
-				"event=0x20,umask=0x10"),
-	/* Quickpath Home Logic write requests from the local socket */
-	INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_LOCAL_WRITES,
-				"event=0x20,umask=0x20"),
+	INTEL_UNCORE_EVENT_DESC(clockticks,                "event=0xff,umask=0xff"),
+	INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any,       "event=0x2f,umask=0x0f"),
+	INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any,      "event=0x2c,umask=0x0f"),
+	INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads,     "event=0x20,umask=0x01"),
+	INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes,    "event=0x20,umask=0x02"),
+	INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads,  "event=0x20,umask=0x04"),
+	INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"),
+	INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads,   "event=0x20,umask=0x10"),
+	INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes,  "event=0x20,umask=0x20"),
 	{ /* end: all zeroes */ },
 };
 
-- 
cgit v1.2.3


From 409a8be61560c5875816da6dcb0c575d78145a5c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 30 May 2012 10:33:24 -0300
Subject: perf tools: Add sort by src line/number

Using addr2line for now, requires debuginfo, needs more work to support
detached debuginfo, aka foo-debuginfo packages.

Example:

	[root@sandy ~]# perf record -a sleep 3
	[ perf record: Woken up 1 times to write data ]
	[ perf record: Captured and wrote 0.555 MB perf.data (~24236 samples) ]
	[root@sandy ~]# perf report -s dso,srcline 2>&1 | grep -v ^# | head -5
	    22.41%  [kernel.kallsyms]  /home/git/linux/drivers/idle/intel_idle.c:280
	     4.79%  [kernel.kallsyms]  /home/git/linux/drivers/cpuidle/cpuidle.c:148
	     4.78%  [kernel.kallsyms]  /home/git/linux/arch/x86/include/asm/atomic64_64.h:121
	     4.49%  [kernel.kallsyms]  /home/git/linux/kernel/sched/core.c:1690
	     4.30%  [kernel.kallsyms]  /home/git/linux/include/linux/seqlock.h:90
	[root@sandy ~]#

[root@sandy ~]# perf top -U -s dso,symbol,srcline
Samples: 1K of event 'cycles', Event count (approx.): 589617389
 18.66%  [kernel]  [k] copy_user_generic_unrolled   /home/git/linux/arch/x86/lib/copy_user_64.S:143
  7.83%  [kernel]  [k] clear_page                   /home/git/linux/arch/x86/lib/clear_page_64.S:39
  6.59%  [kernel]  [k] clear_page                   /home/git/linux/arch/x86/lib/clear_page_64.S:38
  3.66%  [kernel]  [k] page_fault                   /home/git/linux/arch/x86/kernel/entry_64.S:1379
  3.25%  [kernel]  [k] clear_page                   /home/git/linux/arch/x86/lib/clear_page_64.S:40
  3.12%  [kernel]  [k] clear_page                   /home/git/linux/arch/x86/lib/clear_page_64.S:37
  2.74%  [kernel]  [k] clear_page                   /home/git/linux/arch/x86/lib/clear_page_64.S:36
  2.39%  [kernel]  [k] clear_page                   /home/git/linux/arch/x86/lib/clear_page_64.S:43
  2.12%  [kernel]  [k] ioread32                     /home/git/linux/lib/iomap.c:90
  1.51%  [kernel]  [k] copy_user_generic_unrolled   /home/git/linux/arch/x86/lib/copy_user_64.S:144
  1.19%  [kernel]  [k] copy_user_generic_unrolled   /home/git/linux/arch/x86/lib/copy_user_64.S:154

Suggested-by: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-pdmqbng9twz06jzkbgtuwbp8@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-report.txt |  2 +-
 tools/perf/Documentation/perf-top.txt    |  2 +-
 tools/perf/util/hist.h                   |  1 +
 tools/perf/util/sort.c                   | 49 ++++++++++++++++++++++++++++++++
 tools/perf/util/sort.h                   |  2 ++
 5 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 2d89f02719b5..495210a612c4 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -57,7 +57,7 @@ OPTIONS
 
 -s::
 --sort=::
-	Sort by key(s): pid, comm, dso, symbol, parent.
+	Sort by key(s): pid, comm, dso, symbol, parent, srcline.
 
 -p::
 --parent=<regex>::
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 4a5680cb242e..5b80d84d6b4a 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -112,7 +112,7 @@ Default is to monitor all CPUS.
 
 -s::
 --sort::
-	Sort by key(s): pid, comm, dso, symbol, parent
+	Sort by key(s): pid, comm, dso, symbol, parent, srcline.
 
 -n::
 --show-nr-samples::
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 34bb556d6219..0b096c27a419 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -47,6 +47,7 @@ enum hist_column {
 	HISTC_SYMBOL_TO,
 	HISTC_DSO_FROM,
 	HISTC_DSO_TO,
+	HISTC_SRCLINE,
 	HISTC_NR_COLS, /* Last entry */
 };
 
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index a27237430c5f..0f5a0a496bc4 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -241,6 +241,54 @@ struct sort_entry sort_sym = {
 	.se_width_idx	= HISTC_SYMBOL,
 };
 
+/* --sort srcline */
+
+static int64_t
+sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return (int64_t)(right->ip - left->ip);
+}
+
+static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf,
+				   size_t size, unsigned int width __used)
+{
+	FILE *fp;
+	char cmd[PATH_MAX + 2], *path = self->srcline, *nl;
+	size_t line_len;
+
+	if (path != NULL)
+		goto out_path;
+
+	snprintf(cmd, sizeof(cmd), "addr2line -e %s %016" PRIx64,
+		 self->ms.map->dso->long_name, self->ip);
+	fp = popen(cmd, "r");
+	if (!fp)
+		goto out_ip;
+
+	if (getline(&path, &line_len, fp) < 0 || !line_len)
+		goto out_ip;
+	fclose(fp);
+	self->srcline = strdup(path);
+	if (self->srcline == NULL)
+		goto out_ip;
+
+	nl = strchr(self->srcline, '\n');
+	if (nl != NULL)
+		*nl = '\0';
+	path = self->srcline;
+out_path:
+	return repsep_snprintf(bf, size, "%s", path);
+out_ip:
+	return repsep_snprintf(bf, size, "%-#*llx", BITS_PER_LONG / 4, self->ip);
+}
+
+struct sort_entry sort_srcline = {
+	.se_header	= "Source:Line",
+	.se_cmp		= sort__srcline_cmp,
+	.se_snprintf	= hist_entry__srcline_snprintf,
+	.se_width_idx	= HISTC_SRCLINE,
+};
+
 /* --sort parent */
 
 static int64_t
@@ -439,6 +487,7 @@ static struct sort_dimension sort_dimensions[] = {
 	DIM(SORT_PARENT, "parent", sort_parent),
 	DIM(SORT_CPU, "cpu", sort_cpu),
 	DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
+	DIM(SORT_SRCLINE, "srcline", sort_srcline),
 };
 
 int sort_dimension__add(const char *tok)
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 472aa5a63a58..e724b26acd51 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -71,6 +71,7 @@ struct hist_entry {
 	char			level;
 	bool			used;
 	u8			filtered;
+	char			*srcline;
 	struct symbol		*parent;
 	union {
 		unsigned long	  position;
@@ -93,6 +94,7 @@ enum sort_type {
 	SORT_SYM_FROM,
 	SORT_SYM_TO,
 	SORT_MISPREDICT,
+	SORT_SRCLINE,
 };
 
 /*
-- 
cgit v1.2.3


From ba47a142d9f9b84e0464a11b7a067e5ad95c5d4b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 29 May 2012 13:22:58 +0900
Subject: perf ui: Introduce struct perf_error_ops

The struct perf_error_ops is for flexible error logging.

We can register appropriate functions based on front-end.

Signed-off-by: Namhyung Kim <namhyung.kim@lge.com>
Acked-by: Pekka Enberg <penberg@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1338265382-6872-4-git-send-email-namhyung@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile       |   3 +
 tools/perf/ui/gtk/util.c  |  20 ++++
 tools/perf/ui/tui/setup.c |   6 +
 tools/perf/ui/tui/util.c  | 243 ++++++++++++++++++++++++++++++++++++++++
 tools/perf/ui/util.c      | 277 ++++++++++------------------------------------
 tools/perf/ui/util.h      |   9 +-
 tools/perf/util/debug.c   |   2 +-
 tools/perf/util/debug.h   |  23 +++-
 8 files changed, 357 insertions(+), 226 deletions(-)
 create mode 100644 tools/perf/ui/gtk/util.c
 create mode 100644 tools/perf/ui/tui/util.c

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 0eee64cfe9a0..c0ee917ae8ab 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -503,6 +503,7 @@ else
 		LIB_OBJS += $(OUTPUT)ui/progress.o
 		LIB_OBJS += $(OUTPUT)ui/util.o
 		LIB_OBJS += $(OUTPUT)ui/tui/setup.o
+		LIB_OBJS += $(OUTPUT)ui/tui/util.o
 		LIB_H += ui/browser.h
 		LIB_H += ui/browsers/map.h
 		LIB_H += ui/helpline.h
@@ -526,9 +527,11 @@ else
 		EXTLIBS += $(shell pkg-config --libs gtk+-2.0)
 		LIB_OBJS += $(OUTPUT)ui/gtk/browser.o
 		LIB_OBJS += $(OUTPUT)ui/gtk/setup.o
+		LIB_OBJS += $(OUTPUT)ui/gtk/util.o
 		# Make sure that it'd be included only once.
 		ifneq ($(findstring -DNO_NEWT_SUPPORT,$(BASIC_CFLAGS)),)
 			LIB_OBJS += $(OUTPUT)ui/setup.o
+			LIB_OBJS += $(OUTPUT)ui/util.o
 		endif
 	endif
 endif
diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c
new file mode 100644
index 000000000000..a727fe394e91
--- /dev/null
+++ b/tools/perf/ui/gtk/util.c
@@ -0,0 +1,20 @@
+#include "../util.h"
+#include "../../util/debug.h"
+#include "gtk.h"
+
+
+/*
+ * FIXME: Functions below should be implemented properly.
+ *        For now, just add stubs for NO_NEWT=1 build.
+ */
+#ifdef NO_NEWT_SUPPORT
+int ui_helpline__show_help(const char *format __used, va_list ap __used)
+{
+	return 0;
+}
+
+void ui_progress__update(u64 curr __used, u64 total __used,
+			 const char *title __used)
+{
+}
+#endif
diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c
index d33e943ac434..e813c1d17346 100644
--- a/tools/perf/ui/tui/setup.c
+++ b/tools/perf/ui/tui/setup.c
@@ -15,6 +15,8 @@ pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
 
 static volatile int ui__need_resize;
 
+extern struct perf_error_ops perf_tui_eops;
+
 void ui__refresh_dimensions(bool force)
 {
 	if (force || ui__need_resize) {
@@ -122,6 +124,8 @@ int ui__init(void)
 	signal(SIGINT, ui__signal);
 	signal(SIGQUIT, ui__signal);
 	signal(SIGTERM, ui__signal);
+
+	perf_error__register(&perf_tui_eops);
 out:
 	return err;
 }
@@ -137,4 +141,6 @@ void ui__exit(bool wait_for_ok)
 	SLsmg_refresh();
 	SLsmg_reset_smg();
 	SLang_reset_tty();
+
+	perf_error__unregister(&perf_tui_eops);
 }
diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c
new file mode 100644
index 000000000000..092902e30cee
--- /dev/null
+++ b/tools/perf/ui/tui/util.c
@@ -0,0 +1,243 @@
+#include "../../util/util.h"
+#include <signal.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/ttydefaults.h>
+
+#include "../../util/cache.h"
+#include "../../util/debug.h"
+#include "../browser.h"
+#include "../keysyms.h"
+#include "../helpline.h"
+#include "../ui.h"
+#include "../util.h"
+#include "../libslang.h"
+
+static void ui_browser__argv_write(struct ui_browser *browser,
+				   void *entry, int row)
+{
+	char **arg = entry;
+	bool current_entry = ui_browser__is_current_entry(browser, row);
+
+	ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
+						       HE_COLORSET_NORMAL);
+	slsmg_write_nstring(*arg, browser->width);
+}
+
+static int popup_menu__run(struct ui_browser *menu)
+{
+	int key;
+
+	if (ui_browser__show(menu, " ", "ESC: exit, ENTER|->: Select option") < 0)
+		return -1;
+
+	while (1) {
+		key = ui_browser__run(menu, 0);
+
+		switch (key) {
+		case K_RIGHT:
+		case K_ENTER:
+			key = menu->index;
+			break;
+		case K_LEFT:
+		case K_ESC:
+		case 'q':
+		case CTRL('c'):
+			key = -1;
+			break;
+		default:
+			continue;
+		}
+
+		break;
+	}
+
+	ui_browser__hide(menu);
+	return key;
+}
+
+int ui__popup_menu(int argc, char * const argv[])
+{
+	struct ui_browser menu = {
+		.entries    = (void *)argv,
+		.refresh    = ui_browser__argv_refresh,
+		.seek	    = ui_browser__argv_seek,
+		.write	    = ui_browser__argv_write,
+		.nr_entries = argc,
+	};
+
+	return popup_menu__run(&menu);
+}
+
+int ui_browser__input_window(const char *title, const char *text, char *input,
+			     const char *exit_msg, int delay_secs)
+{
+	int x, y, len, key;
+	int max_len = 60, nr_lines = 0;
+	static char buf[50];
+	const char *t;
+
+	t = text;
+	while (1) {
+		const char *sep = strchr(t, '\n');
+
+		if (sep == NULL)
+			sep = strchr(t, '\0');
+		len = sep - t;
+		if (max_len < len)
+			max_len = len;
+		++nr_lines;
+		if (*sep == '\0')
+			break;
+		t = sep + 1;
+	}
+
+	max_len += 2;
+	nr_lines += 8;
+	y = SLtt_Screen_Rows / 2 - nr_lines / 2;
+	x = SLtt_Screen_Cols / 2 - max_len / 2;
+
+	SLsmg_set_color(0);
+	SLsmg_draw_box(y, x++, nr_lines, max_len);
+	if (title) {
+		SLsmg_gotorc(y, x + 1);
+		SLsmg_write_string((char *)title);
+	}
+	SLsmg_gotorc(++y, x);
+	nr_lines -= 7;
+	max_len -= 2;
+	SLsmg_write_wrapped_string((unsigned char *)text, y, x,
+				   nr_lines, max_len, 1);
+	y += nr_lines;
+	len = 5;
+	while (len--) {
+		SLsmg_gotorc(y + len - 1, x);
+		SLsmg_write_nstring((char *)" ", max_len);
+	}
+	SLsmg_draw_box(y++, x + 1, 3, max_len - 2);
+
+	SLsmg_gotorc(y + 3, x);
+	SLsmg_write_nstring((char *)exit_msg, max_len);
+	SLsmg_refresh();
+
+	x += 2;
+	len = 0;
+	key = ui__getch(delay_secs);
+	while (key != K_TIMER && key != K_ENTER && key != K_ESC) {
+		if (key == K_BKSPC) {
+			if (len == 0)
+				goto next_key;
+			SLsmg_gotorc(y, x + --len);
+			SLsmg_write_char(' ');
+		} else {
+			buf[len] = key;
+			SLsmg_gotorc(y, x + len++);
+			SLsmg_write_char(key);
+		}
+		SLsmg_refresh();
+
+		/* XXX more graceful overflow handling needed */
+		if (len == sizeof(buf) - 1) {
+			ui_helpline__push("maximum size of symbol name reached!");
+			key = K_ENTER;
+			break;
+		}
+next_key:
+		key = ui__getch(delay_secs);
+	}
+
+	buf[len] = '\0';
+	strncpy(input, buf, len+1);
+	return key;
+}
+
+int ui__question_window(const char *title, const char *text,
+			const char *exit_msg, int delay_secs)
+{
+	int x, y;
+	int max_len = 0, nr_lines = 0;
+	const char *t;
+
+	t = text;
+	while (1) {
+		const char *sep = strchr(t, '\n');
+		int len;
+
+		if (sep == NULL)
+			sep = strchr(t, '\0');
+		len = sep - t;
+		if (max_len < len)
+			max_len = len;
+		++nr_lines;
+		if (*sep == '\0')
+			break;
+		t = sep + 1;
+	}
+
+	max_len += 2;
+	nr_lines += 4;
+	y = SLtt_Screen_Rows / 2 - nr_lines / 2,
+	x = SLtt_Screen_Cols / 2 - max_len / 2;
+
+	SLsmg_set_color(0);
+	SLsmg_draw_box(y, x++, nr_lines, max_len);
+	if (title) {
+		SLsmg_gotorc(y, x + 1);
+		SLsmg_write_string((char *)title);
+	}
+	SLsmg_gotorc(++y, x);
+	nr_lines -= 2;
+	max_len -= 2;
+	SLsmg_write_wrapped_string((unsigned char *)text, y, x,
+				   nr_lines, max_len, 1);
+	SLsmg_gotorc(y + nr_lines - 2, x);
+	SLsmg_write_nstring((char *)" ", max_len);
+	SLsmg_gotorc(y + nr_lines - 1, x);
+	SLsmg_write_nstring((char *)exit_msg, max_len);
+	SLsmg_refresh();
+	return ui__getch(delay_secs);
+}
+
+int ui__help_window(const char *text)
+{
+	return ui__question_window("Help", text, "Press any key...", 0);
+}
+
+int ui__dialog_yesno(const char *msg)
+{
+	return ui__question_window(NULL, msg, "Enter: Yes, ESC: No", 0);
+}
+
+static int __ui__warning(const char *title, const char *format, va_list args)
+{
+	char *s;
+
+	if (vasprintf(&s, format, args) > 0) {
+		int key;
+
+		pthread_mutex_lock(&ui__lock);
+		key = ui__question_window(title, s, "Press any key...", 0);
+		pthread_mutex_unlock(&ui__lock);
+		free(s);
+		return key;
+	}
+
+	fprintf(stderr, "%s\n", title);
+	vfprintf(stderr, format, args);
+	return K_ESC;
+}
+
+static int perf_tui__error(const char *format, va_list args)
+{
+	return __ui__warning("Error:", format, args);
+}
+
+static int perf_tui__warning(const char *format, va_list args)
+{
+	return __ui__warning("Warning:", format, args);
+}
+
+struct perf_error_ops perf_tui_eops = {
+	.error		= perf_tui__error,
+	.warning	= perf_tui__warning,
+};
diff --git a/tools/perf/ui/util.c b/tools/perf/ui/util.c
index ad4374a16bb0..4f989774c8c6 100644
--- a/tools/perf/ui/util.c
+++ b/tools/perf/ui/util.c
@@ -1,250 +1,85 @@
-#include "../util.h"
-#include <signal.h>
-#include <stdbool.h>
-#include <string.h>
-#include <sys/ttydefaults.h>
-
-#include "../cache.h"
-#include "../debug.h"
-#include "browser.h"
-#include "keysyms.h"
-#include "helpline.h"
-#include "ui.h"
 #include "util.h"
-#include "libslang.h"
-
-static void ui_browser__argv_write(struct ui_browser *browser,
-				   void *entry, int row)
-{
-	char **arg = entry;
-	bool current_entry = ui_browser__is_current_entry(browser, row);
-
-	ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
-						       HE_COLORSET_NORMAL);
-	slsmg_write_nstring(*arg, browser->width);
-}
-
-static int popup_menu__run(struct ui_browser *menu)
-{
-	int key;
-
-	if (ui_browser__show(menu, " ", "ESC: exit, ENTER|->: Select option") < 0)
-		return -1;
+#include "../debug.h"
 
-	while (1) {
-		key = ui_browser__run(menu, 0);
-
-		switch (key) {
-		case K_RIGHT:
-		case K_ENTER:
-			key = menu->index;
-			break;
-		case K_LEFT:
-		case K_ESC:
-		case 'q':
-		case CTRL('c'):
-			key = -1;
-			break;
-		default:
-			continue;
-		}
-
-		break;
-	}
-
-	ui_browser__hide(menu);
-	return key;
-}
 
-int ui__popup_menu(int argc, char * const argv[])
+/*
+ * Default error logging functions
+ */
+static int perf_stdio__error(const char *format, va_list args)
 {
-	struct ui_browser menu = {
-		.entries    = (void *)argv,
-		.refresh    = ui_browser__argv_refresh,
-		.seek	    = ui_browser__argv_seek,
-		.write	    = ui_browser__argv_write,
-		.nr_entries = argc,
-	};
-
-	return popup_menu__run(&menu);
+	fprintf(stderr, "Error:\n");
+	vfprintf(stderr, format, args);
+	return 0;
 }
 
-int ui_browser__input_window(const char *title, const char *text, char *input,
-			     const char *exit_msg, int delay_secs)
+static int perf_stdio__warning(const char *format, va_list args)
 {
-	int x, y, len, key;
-	int max_len = 60, nr_lines = 0;
-	static char buf[50];
-	const char *t;
-
-	t = text;
-	while (1) {
-		const char *sep = strchr(t, '\n');
-
-		if (sep == NULL)
-			sep = strchr(t, '\0');
-		len = sep - t;
-		if (max_len < len)
-			max_len = len;
-		++nr_lines;
-		if (*sep == '\0')
-			break;
-		t = sep + 1;
-	}
-
-	max_len += 2;
-	nr_lines += 8;
-	y = SLtt_Screen_Rows / 2 - nr_lines / 2;
-	x = SLtt_Screen_Cols / 2 - max_len / 2;
-
-	SLsmg_set_color(0);
-	SLsmg_draw_box(y, x++, nr_lines, max_len);
-	if (title) {
-		SLsmg_gotorc(y, x + 1);
-		SLsmg_write_string((char *)title);
-	}
-	SLsmg_gotorc(++y, x);
-	nr_lines -= 7;
-	max_len -= 2;
-	SLsmg_write_wrapped_string((unsigned char *)text, y, x,
-				   nr_lines, max_len, 1);
-	y += nr_lines;
-	len = 5;
-	while (len--) {
-		SLsmg_gotorc(y + len - 1, x);
-		SLsmg_write_nstring((char *)" ", max_len);
-	}
-	SLsmg_draw_box(y++, x + 1, 3, max_len - 2);
-
-	SLsmg_gotorc(y + 3, x);
-	SLsmg_write_nstring((char *)exit_msg, max_len);
-	SLsmg_refresh();
-
-	x += 2;
-	len = 0;
-	key = ui__getch(delay_secs);
-	while (key != K_TIMER && key != K_ENTER && key != K_ESC) {
-		if (key == K_BKSPC) {
-			if (len == 0)
-				goto next_key;
-			SLsmg_gotorc(y, x + --len);
-			SLsmg_write_char(' ');
-		} else {
-			buf[len] = key;
-			SLsmg_gotorc(y, x + len++);
-			SLsmg_write_char(key);
-		}
-		SLsmg_refresh();
-
-		/* XXX more graceful overflow handling needed */
-		if (len == sizeof(buf) - 1) {
-			ui_helpline__push("maximum size of symbol name reached!");
-			key = K_ENTER;
-			break;
-		}
-next_key:
-		key = ui__getch(delay_secs);
-	}
-
-	buf[len] = '\0';
-	strncpy(input, buf, len+1);
-	return key;
+	fprintf(stderr, "Warning:\n");
+	vfprintf(stderr, format, args);
+	return 0;
 }
 
-int ui__question_window(const char *title, const char *text,
-			const char *exit_msg, int delay_secs)
+static struct perf_error_ops default_eops =
 {
-	int x, y;
-	int max_len = 0, nr_lines = 0;
-	const char *t;
-
-	t = text;
-	while (1) {
-		const char *sep = strchr(t, '\n');
-		int len;
-
-		if (sep == NULL)
-			sep = strchr(t, '\0');
-		len = sep - t;
-		if (max_len < len)
-			max_len = len;
-		++nr_lines;
-		if (*sep == '\0')
-			break;
-		t = sep + 1;
-	}
-
-	max_len += 2;
-	nr_lines += 4;
-	y = SLtt_Screen_Rows / 2 - nr_lines / 2,
-	x = SLtt_Screen_Cols / 2 - max_len / 2;
-
-	SLsmg_set_color(0);
-	SLsmg_draw_box(y, x++, nr_lines, max_len);
-	if (title) {
-		SLsmg_gotorc(y, x + 1);
-		SLsmg_write_string((char *)title);
-	}
-	SLsmg_gotorc(++y, x);
-	nr_lines -= 2;
-	max_len -= 2;
-	SLsmg_write_wrapped_string((unsigned char *)text, y, x,
-				   nr_lines, max_len, 1);
-	SLsmg_gotorc(y + nr_lines - 2, x);
-	SLsmg_write_nstring((char *)" ", max_len);
-	SLsmg_gotorc(y + nr_lines - 1, x);
-	SLsmg_write_nstring((char *)exit_msg, max_len);
-	SLsmg_refresh();
-	return ui__getch(delay_secs);
-}
+	.error		= perf_stdio__error,
+	.warning	= perf_stdio__warning,
+};
 
-int ui__help_window(const char *text)
-{
-	return ui__question_window("Help", text, "Press any key...", 0);
-}
+static struct perf_error_ops *perf_eops = &default_eops;
 
-int ui__dialog_yesno(const char *msg)
-{
-	return ui__question_window(NULL, msg, "Enter: Yes, ESC: No", 0);
-}
 
-int __ui__warning(const char *title, const char *format, va_list args)
+int ui__error(const char *format, ...)
 {
-	char *s;
-
-	if (use_browser > 0 && vasprintf(&s, format, args) > 0) {
-		int key;
+	int ret;
+	va_list args;
 
-		pthread_mutex_lock(&ui__lock);
-		key = ui__question_window(title, s, "Press any key...", 0);
-		pthread_mutex_unlock(&ui__lock);
-		free(s);
-		return key;
-	}
+	va_start(args, format);
+	ret = perf_eops->error(format, args);
+	va_end(args);
 
-	fprintf(stderr, "%s:\n", title);
-	vfprintf(stderr, format, args);
-	return K_ESC;
+	return ret;
 }
 
 int ui__warning(const char *format, ...)
 {
-	int key;
+	int ret;
 	va_list args;
 
 	va_start(args, format);
-	key = __ui__warning("Warning", format, args);
+	ret = perf_eops->warning(format, args);
 	va_end(args);
-	return key;
+
+	return ret;
 }
 
-int ui__error(const char *format, ...)
+
+/**
+ * perf_error__register - Register error logging functions
+ * @eops: The pointer to error logging function struct
+ *
+ * Register UI-specific error logging functions. Before calling this,
+ * other logging functions should be unregistered, if any.
+ */
+int perf_error__register(struct perf_error_ops *eops)
 {
-	int key;
-	va_list args;
+	if (perf_eops != &default_eops)
+		return -1;
 
-	va_start(args, format);
-	key = __ui__warning("Error", format, args);
-	va_end(args);
-	return key;
+	perf_eops = eops;
+	return 0;
+}
+
+/**
+ * perf_error__unregister - Unregister error logging functions
+ * @eops: The pointer to error logging function struct
+ *
+ * Unregister already registered error logging functions.
+ */
+int perf_error__unregister(struct perf_error_ops *eops)
+{
+	if (perf_eops != eops)
+		return -1;
+
+	perf_eops = &default_eops;
+	return 0;
 }
diff --git a/tools/perf/ui/util.h b/tools/perf/ui/util.h
index 2d1738bd71c8..361f08c52d37 100644
--- a/tools/perf/ui/util.h
+++ b/tools/perf/ui/util.h
@@ -9,6 +9,13 @@ int ui__help_window(const char *text);
 int ui__dialog_yesno(const char *msg);
 int ui__question_window(const char *title, const char *text,
 			const char *exit_msg, int delay_secs);
-int __ui__warning(const char *title, const char *format, va_list args);
+
+struct perf_error_ops {
+	int (*error)(const char *format, va_list args);
+	int (*warning)(const char *format, va_list args);
+};
+
+int perf_error__register(struct perf_error_ops *eops);
+int perf_error__unregister(struct perf_error_ops *eops);
 
 #endif /* _PERF_UI_UTIL_H_ */
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index efb1fce259a4..4dfe0bb3c322 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -47,7 +47,7 @@ int dump_printf(const char *fmt, ...)
 	return ret;
 }
 
-#ifdef NO_NEWT_SUPPORT
+#if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT)
 int ui__warning(const char *format, ...)
 {
 	va_list args;
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 6bebe7f0a20c..015c91dbc096 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -12,8 +12,9 @@ int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
 void trace_event(union perf_event *event);
 
 struct ui_progress;
+struct perf_error_ops;
 
-#ifdef NO_NEWT_SUPPORT
+#if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT)
 static inline int ui_helpline__show_help(const char *format __used, va_list ap __used)
 {
 	return 0;
@@ -23,12 +24,28 @@ static inline void ui_progress__update(u64 curr __used, u64 total __used,
 				       const char *title __used) {}
 
 #define ui__error(format, arg...) ui__warning(format, ##arg)
-#else
+
+static inline int
+perf_error__register(struct perf_error_ops *eops __used)
+{
+	return 0;
+}
+
+static inline int
+perf_error__unregister(struct perf_error_ops *eops __used)
+{
+	return 0;
+}
+
+#else /* NO_NEWT_SUPPORT && NO_GTK2_SUPPORT */
+
 extern char ui_helpline__last_msg[];
 int ui_helpline__show_help(const char *format, va_list ap);
 #include "../ui/progress.h"
 int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2)));
-#endif
+#include "../ui/util.h"
+
+#endif /* NO_NEWT_SUPPORT && NO_GTK2_SUPPORT */
 
 int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));
 int ui__error_paranoid(void);
-- 
cgit v1.2.3


From 42ab68a35ffee04700648ec42c9507145a66837d Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 29 May 2012 13:22:59 +0900
Subject: perf ui/gtk: Introduce struct perf_gtk_context

The struct perf_gtk_context is for tracking current state of GTK window
and/or other things. This is a preparation of next changes.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Pekka Enberg <penberg@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1338265382-6872-5-git-send-email-namhyung@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/gtk/browser.c |  8 +++++++-
 tools/perf/ui/gtk/gtk.h     | 17 +++++++++++++++++
 tools/perf/ui/gtk/util.c    | 23 +++++++++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c
index 0656c381a89c..33ab1aee3472 100644
--- a/tools/perf/ui/gtk/browser.c
+++ b/tools/perf/ui/gtk/browser.c
@@ -11,8 +11,8 @@
 
 static void perf_gtk__signal(int sig)
 {
+	perf_gtk__exit(false);
 	psignal(sig, "perf");
-	gtk_main_quit();
 }
 
 static void perf_gtk__resize_window(GtkWidget *window)
@@ -143,6 +143,10 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
 
 	g_signal_connect(window, "delete_event", gtk_main_quit, NULL);
 
+	pgctx = perf_gtk__activate_context(window);
+	if (!pgctx)
+		return -1;
+
 	notebook = gtk_notebook_new();
 
 	list_for_each_entry(pos, &evlist->entries, node) {
@@ -174,5 +178,7 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
 
 	gtk_main();
 
+	perf_gtk__deactivate_context(&pgctx);
+
 	return 0;
 }
diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h
index 75177ee04032..34fbca6d48a2 100644
--- a/tools/perf/ui/gtk/gtk.h
+++ b/tools/perf/ui/gtk/gtk.h
@@ -1,8 +1,25 @@
 #ifndef _PERF_GTK_H_
 #define _PERF_GTK_H_ 1
 
+#include <stdbool.h>
+
 #pragma GCC diagnostic ignored "-Wstrict-prototypes"
 #include <gtk/gtk.h>
 #pragma GCC diagnostic error "-Wstrict-prototypes"
 
+
+struct perf_gtk_context {
+	GtkWidget *main_window;
+};
+
+extern struct perf_gtk_context *pgctx;
+
+static inline bool perf_gtk__is_active_context(struct perf_gtk_context *ctx)
+{
+	return ctx && ctx->main_window;
+}
+
+struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window);
+int perf_gtk__deactivate_context(struct perf_gtk_context **ctx);
+
 #endif /* _PERF_GTK_H_ */
diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c
index a727fe394e91..6fe13fdc513e 100644
--- a/tools/perf/ui/gtk/util.c
+++ b/tools/perf/ui/gtk/util.c
@@ -3,6 +3,29 @@
 #include "gtk.h"
 
 
+struct perf_gtk_context *pgctx;
+
+struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window)
+{
+	struct perf_gtk_context *ctx;
+
+	ctx = malloc(sizeof(*pgctx));
+	if (ctx)
+		ctx->main_window = window;
+
+	return ctx;
+}
+
+int perf_gtk__deactivate_context(struct perf_gtk_context **ctx)
+{
+	if (!perf_gtk__is_active_context(*ctx))
+		return -1;
+
+	free(*ctx);
+	*ctx = NULL;
+	return 0;
+}
+
 /*
  * FIXME: Functions below should be implemented properly.
  *        For now, just add stubs for NO_NEWT=1 build.
-- 
cgit v1.2.3


From b4418c6848dcd56cc90625dcfaef7cb019492233 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 29 May 2012 13:23:00 +0900
Subject: perf ui/gtk: Add GTK statusbar widget to browser window

Add statusbar widget to display non-critical messages at the bottom of
the window. This can be used for showing a status change, warning or
help message.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Pekka Enberg <penberg@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1338265382-6872-6-git-send-email-namhyung@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/gtk/browser.c | 26 +++++++++++++++++++++++++-
 tools/perf/ui/gtk/gtk.h     |  2 ++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c
index 33ab1aee3472..ece360db8658 100644
--- a/tools/perf/ui/gtk/browser.c
+++ b/tools/perf/ui/gtk/browser.c
@@ -122,13 +122,30 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists)
 	gtk_container_add(GTK_CONTAINER(window), view);
 }
 
+static GtkWidget *perf_gtk__setup_statusbar(void)
+{
+	GtkWidget *stbar;
+	unsigned ctxid;
+
+	stbar = gtk_statusbar_new();
+
+	ctxid = gtk_statusbar_get_context_id(GTK_STATUSBAR(stbar),
+					     "perf report");
+	pgctx->statbar = stbar;
+	pgctx->statbar_ctx_id = ctxid;
+
+	return stbar;
+}
+
 int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
 				  const char *help __used,
 				  void (*timer) (void *arg)__used,
 				  void *arg __used, int delay_secs __used)
 {
 	struct perf_evsel *pos;
+	GtkWidget *vbox;
 	GtkWidget *notebook;
+	GtkWidget *statbar;
 	GtkWidget *window;
 
 	signal(SIGSEGV, perf_gtk__signal);
@@ -147,6 +164,8 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
 	if (!pgctx)
 		return -1;
 
+	vbox = gtk_vbox_new(FALSE, 0);
+
 	notebook = gtk_notebook_new();
 
 	list_for_each_entry(pos, &evlist->entries, node) {
@@ -168,7 +187,12 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
 		gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window, tab_label);
 	}
 
-	gtk_container_add(GTK_CONTAINER(window), notebook);
+	gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0);
+
+	statbar = perf_gtk__setup_statusbar();
+	gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0);
+
+	gtk_container_add(GTK_CONTAINER(window), vbox);
 
 	gtk_widget_show_all(window);
 
diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h
index 34fbca6d48a2..206167868c5f 100644
--- a/tools/perf/ui/gtk/gtk.h
+++ b/tools/perf/ui/gtk/gtk.h
@@ -10,6 +10,8 @@
 
 struct perf_gtk_context {
 	GtkWidget *main_window;
+	GtkWidget *statbar;
+	guint statbar_ctx_id;
 };
 
 extern struct perf_gtk_context *pgctx;
-- 
cgit v1.2.3


From a6b702c117f839023814c1e03453c701d26de522 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 29 May 2012 13:23:01 +0900
Subject: perf ui/gtk: Add GTK info_bar widget to browser window

The GtkInfoBar is a modern UI component to display messages without
bothering the main window. It'll be used for showing a warning message.

As the GtkInfoBar requires 2.18 (or newer) version of GTK+ library, add
availability check to Makefile too.

Suggested-by: Sunjin Yang <fan4326@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Pekka Enberg <penberg@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1338265382-6872-7-git-send-email-namhyung@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile                 |  3 +++
 tools/perf/config/feature-tests.mak | 13 +++++++++++++
 tools/perf/ui/gtk/browser.c         | 33 +++++++++++++++++++++++++++++++++
 tools/perf/ui/gtk/gtk.h             | 12 ++++++++++++
 4 files changed, 61 insertions(+)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index c0ee917ae8ab..d698c118a602 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -523,6 +523,9 @@ else
 		msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev);
 		BASIC_CFLAGS += -DNO_GTK2_SUPPORT
 	else
+		ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2)),y)
+			BASIC_CFLAGS += -DHAVE_GTK_INFO_BAR
+		endif
 		BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0)
 		EXTLIBS += $(shell pkg-config --libs gtk+-2.0)
 		LIB_OBJS += $(OUTPUT)ui/gtk/browser.o
diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak
index d9084e03ce56..6c18785a6417 100644
--- a/tools/perf/config/feature-tests.mak
+++ b/tools/perf/config/feature-tests.mak
@@ -78,6 +78,19 @@ int main(int argc, char *argv[])
         return 0;
 }
 endef
+
+define SOURCE_GTK2_INFOBAR
+#pragma GCC diagnostic ignored \"-Wstrict-prototypes\"
+#include <gtk/gtk.h>
+#pragma GCC diagnostic error \"-Wstrict-prototypes\"
+
+int main(void)
+{
+	gtk_info_bar_new();
+
+	return 0;
+}
+endef
 endif
 
 ifndef NO_LIBPERL
diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c
index ece360db8658..fd41e8d10337 100644
--- a/tools/perf/ui/gtk/browser.c
+++ b/tools/perf/ui/gtk/browser.c
@@ -122,6 +122,34 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists)
 	gtk_container_add(GTK_CONTAINER(window), view);
 }
 
+#ifdef HAVE_GTK_INFO_BAR
+static GtkWidget *perf_gtk__setup_info_bar(void)
+{
+	GtkWidget *info_bar;
+	GtkWidget *label;
+	GtkWidget *content_area;
+
+	info_bar = gtk_info_bar_new();
+	gtk_widget_set_no_show_all(info_bar, TRUE);
+
+	label = gtk_label_new("");
+	gtk_widget_show(label);
+
+	content_area = gtk_info_bar_get_content_area(GTK_INFO_BAR(info_bar));
+	gtk_container_add(GTK_CONTAINER(content_area), label);
+
+	gtk_info_bar_add_button(GTK_INFO_BAR(info_bar), GTK_STOCK_OK,
+				GTK_RESPONSE_OK);
+	g_signal_connect(info_bar, "response",
+			 G_CALLBACK(gtk_widget_hide), NULL);
+
+	pgctx->info_bar = info_bar;
+	pgctx->message_label = label;
+
+	return info_bar;
+}
+#endif
+
 static GtkWidget *perf_gtk__setup_statusbar(void)
 {
 	GtkWidget *stbar;
@@ -145,6 +173,7 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
 	struct perf_evsel *pos;
 	GtkWidget *vbox;
 	GtkWidget *notebook;
+	GtkWidget *info_bar;
 	GtkWidget *statbar;
 	GtkWidget *window;
 
@@ -189,6 +218,10 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
 
 	gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0);
 
+	info_bar = perf_gtk__setup_info_bar();
+	if (info_bar)
+		gtk_box_pack_start(GTK_BOX(vbox), info_bar, FALSE, FALSE, 0);
+
 	statbar = perf_gtk__setup_statusbar();
 	gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0);
 
diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h
index 206167868c5f..a4d0f2b4a2dc 100644
--- a/tools/perf/ui/gtk/gtk.h
+++ b/tools/perf/ui/gtk/gtk.h
@@ -10,6 +10,11 @@
 
 struct perf_gtk_context {
 	GtkWidget *main_window;
+
+#ifdef HAVE_GTK_INFO_BAR
+	GtkWidget *info_bar;
+	GtkWidget *message_label;
+#endif
 	GtkWidget *statbar;
 	guint statbar_ctx_id;
 };
@@ -24,4 +29,11 @@ static inline bool perf_gtk__is_active_context(struct perf_gtk_context *ctx)
 struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window);
 int perf_gtk__deactivate_context(struct perf_gtk_context **ctx);
 
+#ifndef HAVE_GTK_INFO_BAR
+static inline GtkWidget *perf_gtk__setup_info_bar(void)
+{
+	return NULL;
+}
+#endif
+
 #endif /* _PERF_GTK_H_ */
-- 
cgit v1.2.3


From e078ba14dff5c315ce19a978574883f417d2cfa0 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 29 May 2012 13:23:02 +0900
Subject: perf ui/gtk: Use struct perf_error_ops

Define and use perf_gtk_eops to provide a GTK2 message dialog for error
reporting and a info_bar for warning.

As GtkInfoBar requires recent GTK+ libraries, provides a fallback
implementation using statusbar widget too.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Pekka Enberg <penberg@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1338265382-6872-8-git-send-email-namhyung@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/gtk/setup.c |  5 +++
 tools/perf/ui/gtk/util.c  | 86 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)

diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c
index 829529957766..92879ce61e2f 100644
--- a/tools/perf/ui/gtk/setup.c
+++ b/tools/perf/ui/gtk/setup.c
@@ -1,12 +1,17 @@
 #include "gtk.h"
 #include "../../util/cache.h"
+#include "../../util/debug.h"
+
+extern struct perf_error_ops perf_gtk_eops;
 
 int perf_gtk__init(void)
 {
+	perf_error__register(&perf_gtk_eops);
 	return gtk_init_check(NULL, NULL) ? 0 : -1;
 }
 
 void perf_gtk__exit(bool wait_for_ok __used)
 {
+	perf_error__unregister(&perf_gtk_eops);
 	gtk_main_quit();
 }
diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c
index 6fe13fdc513e..0ead373c0dfb 100644
--- a/tools/perf/ui/gtk/util.c
+++ b/tools/perf/ui/gtk/util.c
@@ -2,6 +2,8 @@
 #include "../../util/debug.h"
 #include "gtk.h"
 
+#include <string.h>
+
 
 struct perf_gtk_context *pgctx;
 
@@ -26,6 +28,90 @@ int perf_gtk__deactivate_context(struct perf_gtk_context **ctx)
 	return 0;
 }
 
+static int perf_gtk__error(const char *format, va_list args)
+{
+	char *msg;
+	GtkWidget *dialog;
+
+	if (!perf_gtk__is_active_context(pgctx) ||
+	    vasprintf(&msg, format, args) < 0) {
+		fprintf(stderr, "Error:\n");
+		vfprintf(stderr, format, args);
+		fprintf(stderr, "\n");
+		return -1;
+	}
+
+	dialog = gtk_message_dialog_new_with_markup(GTK_WINDOW(pgctx->main_window),
+					GTK_DIALOG_DESTROY_WITH_PARENT,
+					GTK_MESSAGE_ERROR,
+					GTK_BUTTONS_CLOSE,
+					"<b>Error</b>\n\n%s", msg);
+	gtk_dialog_run(GTK_DIALOG(dialog));
+
+	gtk_widget_destroy(dialog);
+	free(msg);
+	return 0;
+}
+
+#ifdef HAVE_GTK_INFO_BAR
+static int perf_gtk__warning_info_bar(const char *format, va_list args)
+{
+	char *msg;
+
+	if (!perf_gtk__is_active_context(pgctx) ||
+	    vasprintf(&msg, format, args) < 0) {
+		fprintf(stderr, "Warning:\n");
+		vfprintf(stderr, format, args);
+		fprintf(stderr, "\n");
+		return -1;
+	}
+
+	gtk_label_set_text(GTK_LABEL(pgctx->message_label), msg);
+	gtk_info_bar_set_message_type(GTK_INFO_BAR(pgctx->info_bar),
+				      GTK_MESSAGE_WARNING);
+	gtk_widget_show(pgctx->info_bar);
+
+	free(msg);
+	return 0;
+}
+#else
+static int perf_gtk__warning_statusbar(const char *format, va_list args)
+{
+	char *msg, *p;
+
+	if (!perf_gtk__is_active_context(pgctx) ||
+	    vasprintf(&msg, format, args) < 0) {
+		fprintf(stderr, "Warning:\n");
+		vfprintf(stderr, format, args);
+		fprintf(stderr, "\n");
+		return -1;
+	}
+
+	gtk_statusbar_pop(GTK_STATUSBAR(pgctx->statbar),
+			  pgctx->statbar_ctx_id);
+
+	/* Only first line can be displayed */
+	p = strchr(msg, '\n');
+	if (p)
+		*p = '\0';
+
+	gtk_statusbar_push(GTK_STATUSBAR(pgctx->statbar),
+			   pgctx->statbar_ctx_id, msg);
+
+	free(msg);
+	return 0;
+}
+#endif
+
+struct perf_error_ops perf_gtk_eops = {
+	.error		= perf_gtk__error,
+#ifdef HAVE_GTK_INFO_BAR
+	.warning	= perf_gtk__warning_info_bar,
+#else
+	.warning	= perf_gtk__warning_statusbar,
+#endif
+};
+
 /*
  * FIXME: Functions below should be implemented properly.
  *        For now, just add stubs for NO_NEWT=1 build.
-- 
cgit v1.2.3


From cb1a28a0cbf9dac5a7a0ca02ebebc12db260d2f8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 Jun 2012 18:23:31 -0300
Subject: perf lib: Introduce rtrim

Remove the trailing whitespaces.

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-8bxozh5lyixgjmziqaxo9675@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/string.c | 22 ++++++++++++++++++++++
 tools/perf/util/util.h   |  2 ++
 2 files changed, 24 insertions(+)

diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index d5836382ff2c..199bc4d8905d 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -313,3 +313,25 @@ int strtailcmp(const char *s1, const char *s2)
 	return 0;
 }
 
+/**
+ * rtrim - Removes trailing whitespace from @s.
+ * @s: The string to be stripped.
+ *
+ * Note that the first trailing whitespace is replaced with a %NUL-terminator
+ * in the given string @s. Returns @s.
+ */
+char *rtrim(char *s)
+{
+	size_t size = strlen(s);
+	char *end;
+
+	if (!size)
+		return s;
+
+	end = s + size - 1;
+	while (end >= s && isspace(*end))
+		end--;
+	*(end + 1) = '\0';
+
+	return s;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 2daaedb83d84..b13c7331eaf8 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -264,4 +264,6 @@ bool is_power_of_2(unsigned long n)
 
 size_t hex_width(u64 v);
 
+char *rtrim(char *s);
+
 #endif
-- 
cgit v1.2.3


From aff3f3f68ae6002a30543726b2ae48cafce109e6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 Jun 2012 19:31:28 -0300
Subject: perf hists browser: Implement printing snapshots to files

To avoid having to resort to --stdio, that expands everything, instead
allow the user to go on expanding the relevant callchains and then press
'P' to print that view.

As the hists browser is used for both static (report) and dynamic (top)
views, it prints to a 'perf.hists.N' sequence, i.e. multiple snapshots
can be taken in report and top.

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-wr9xx4ba0utrynu5j6wotd79@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 195 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 195 insertions(+)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 53f6697d014e..f556e5f6388b 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -23,6 +23,7 @@ struct hist_browser {
 	struct hists	    *hists;
 	struct hist_entry   *he_selection;
 	struct map_symbol   *selection;
+	int		     print_seq;
 	bool		     has_symbols;
 };
 
@@ -800,6 +801,196 @@ do_offset:
 	}
 }
 
+static int hist_browser__fprintf_callchain_node_rb_tree(struct hist_browser *browser,
+							struct callchain_node *chain_node,
+							u64 total, int level,
+							FILE *fp)
+{
+	struct rb_node *node;
+	int offset = level * LEVEL_OFFSET_STEP;
+	u64 new_total, remaining;
+	int printed = 0;
+
+	if (callchain_param.mode == CHAIN_GRAPH_REL)
+		new_total = chain_node->children_hit;
+	else
+		new_total = total;
+
+	remaining = new_total;
+	node = rb_first(&chain_node->rb_root);
+	while (node) {
+		struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
+		struct rb_node *next = rb_next(node);
+		u64 cumul = callchain_cumul_hits(child);
+		struct callchain_list *chain;
+		char folded_sign = ' ';
+		int first = true;
+		int extra_offset = 0;
+
+		remaining -= cumul;
+
+		list_for_each_entry(chain, &child->val, list) {
+			char ipstr[BITS_PER_LONG / 4 + 1], *alloc_str;
+			const char *str;
+			bool was_first = first;
+
+			if (first)
+				first = false;
+			else
+				extra_offset = LEVEL_OFFSET_STEP;
+
+			folded_sign = callchain_list__folded(chain);
+
+			alloc_str = NULL;
+			str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr));
+			if (was_first) {
+				double percent = cumul * 100.0 / new_total;
+
+				if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0)
+					str = "Not enough memory!";
+				else
+					str = alloc_str;
+			}
+
+			printed += fprintf(fp, "%*s%c %s\n", offset + extra_offset, " ", folded_sign, str);
+			free(alloc_str);
+			if (folded_sign == '+')
+				break;
+		}
+
+		if (folded_sign == '-') {
+			const int new_level = level + (extra_offset ? 2 : 1);
+			printed += hist_browser__fprintf_callchain_node_rb_tree(browser, child, new_total,
+										new_level, fp);
+		}
+
+		node = next;
+	}
+
+	return printed;
+}
+
+static int hist_browser__fprintf_callchain_node(struct hist_browser *browser,
+						struct callchain_node *node,
+						int level, FILE *fp)
+{
+	struct callchain_list *chain;
+	int offset = level * LEVEL_OFFSET_STEP;
+	char folded_sign = ' ';
+	int printed = 0;
+
+	list_for_each_entry(chain, &node->val, list) {
+		char ipstr[BITS_PER_LONG / 4 + 1], *s;
+
+		folded_sign = callchain_list__folded(chain);
+		s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr));
+		printed += fprintf(fp, "%*s%c %s\n", offset, " ", folded_sign, s);
+	}
+
+	if (folded_sign == '-')
+		printed += hist_browser__fprintf_callchain_node_rb_tree(browser, node,
+									browser->hists->stats.total_period,
+									level + 1,  fp);
+	return printed;
+}
+
+static int hist_browser__fprintf_callchain(struct hist_browser *browser,
+					   struct rb_root *chain, int level, FILE *fp)
+{
+	struct rb_node *nd;
+	int printed = 0;
+
+	for (nd = rb_first(chain); nd; nd = rb_next(nd)) {
+		struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
+
+		printed += hist_browser__fprintf_callchain_node(browser, node, level, fp);
+	}
+
+	return printed;
+}
+
+static int hist_browser__fprintf_entry(struct hist_browser *browser,
+				       struct hist_entry *he, FILE *fp)
+{
+	char s[8192];
+	double percent;
+	int printed = 0;
+	char folded_sign = ' ';
+
+	if (symbol_conf.use_callchain)
+		folded_sign = hist_entry__folded(he);
+
+	hist_entry__snprintf(he, s, sizeof(s), browser->hists);
+	percent = (he->period * 100.0) / browser->hists->stats.total_period;
+
+	if (symbol_conf.use_callchain)
+		printed += fprintf(fp, "%c ", folded_sign);
+
+	printed += fprintf(fp, " %5.2f%%", percent);
+
+	if (symbol_conf.show_nr_samples)
+		printed += fprintf(fp, " %11u", he->nr_events);
+
+	if (symbol_conf.show_total_period)
+		printed += fprintf(fp, " %12" PRIu64, he->period);
+
+	printed += fprintf(fp, "%s\n", rtrim(s));
+
+	if (folded_sign == '-')
+		printed += hist_browser__fprintf_callchain(browser, &he->sorted_chain, 1, fp);
+
+	return printed;
+}
+
+static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
+{
+	struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries));
+	int printed = 0;
+
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+		printed += hist_browser__fprintf_entry(browser, h, fp);
+		nd = hists__filter_entries(rb_next(nd));
+	}
+
+	return printed;
+}
+
+static int hist_browser__dump(struct hist_browser *browser)
+{
+	char filename[64];
+	FILE *fp;
+
+	while (1) {
+		scnprintf(filename, sizeof(filename), "perf.hist.%d", browser->print_seq);
+		if (access(filename, F_OK))
+			break;
+		/*
+ 		 * XXX: Just an arbitrary lazy upper limit
+ 		 */
+		if (++browser->print_seq == 8192) {
+			ui_helpline__fpush("Too many perf.hist.N files, nothing written!");
+			return -1;
+		}
+	}
+
+	fp = fopen(filename, "w");
+	if (fp == NULL) {
+		char bf[64];
+		strerror_r(errno, bf, sizeof(bf));
+		ui_helpline__fpush("Couldn't write to %s: %s", filename, bf);
+		return -1;
+	}
+
+	++browser->print_seq;
+	hist_browser__fprintf(browser, fp);
+	fclose(fp);
+	ui_helpline__fpush("%s written!", filename);
+
+	return 0;
+}
+
 static struct hist_browser *hist_browser__new(struct hists *hists)
 {
 	struct hist_browser *browser = zalloc(sizeof(*browser));
@@ -937,6 +1128,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 			    browser->selection->map->dso->annotate_warned)
 				continue;
 			goto do_annotate;
+		case 'P':
+			hist_browser__dump(browser);
+			continue;
 		case 'd':
 			goto zoom_dso;
 		case 't':
@@ -969,6 +1163,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 					"E             Expand all callchains\n"
 					"d             Zoom into current DSO\n"
 					"t             Zoom into current Thread\n"
+					"P             Print histograms to perf.hist.N\n"
 					"/             Filter symbol by name");
 			continue;
 		case K_ENTER:
-- 
cgit v1.2.3


From 27f18617b01dbbc928e9bd3731d1766222fb7e0d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 11 Jun 2012 13:33:09 -0300
Subject: perf evsel: Carve out event modifier formatting

From perf_evsel__hw_name, so that we can use it for the other kinds of
events (tracepoints, software, hw cache, etc).

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-9gmd5wewsrvtny8tzxjfp471@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 9f6cebd798ee..dab893804a14 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -86,16 +86,15 @@ const char *__perf_evsel__hw_name(u64 config)
 	return "unknown-hardware";
 }
 
-static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size)
+static int perf_evsel__add_modifiers(struct perf_evsel *evsel, char *bf, size_t size)
 {
-	int colon = 0;
+	int colon = 0, r = 0;
 	struct perf_event_attr *attr = &evsel->attr;
-	int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(attr->config));
 	bool exclude_guest_default = false;
 
 #define MOD_PRINT(context, mod)	do {					\
 		if (!attr->exclude_##context) {				\
-			if (!colon) colon = r++;			\
+			if (!colon) colon = ++r;			\
 			r += scnprintf(bf + r, size - r, "%c", mod);	\
 		} } while(0)
 
@@ -108,7 +107,7 @@ static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size)
 
 	if (attr->precise_ip) {
 		if (!colon)
-			colon = r++;
+			colon = ++r;
 		r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");
 		exclude_guest_default = true;
 	}
@@ -119,10 +118,16 @@ static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size)
 	}
 #undef MOD_PRINT
 	if (colon)
-		bf[colon] = ':';
+		bf[colon - 1] = ':';
 	return r;
 }
 
+static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+	int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->attr.config));
+	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
 int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size)
 {
 	int ret;
-- 
cgit v1.2.3


From 0b668bc9a74ce1bd3b8c5fd93e8d85ed955e11fe Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 11 Jun 2012 14:08:07 -0300
Subject: perf tools: Reconstruct hw cache event with modifiers from
 perf_event_attr

  [root@sandy ~]# perf record -a -e dTLB-load-misses:u usleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.486 MB perf.data (~21216 samples) ]

Before:

  [root@sandy ~]# perf evlist
  dTLB-load-misses
  [root@sandy ~]#

After:

  [root@sandy ~]# perf evlist
  dTLB-load-misses:u
  [root@sandy ~]#

Ditto for other tools.

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-7x1b0e6jthkr93lfjzsuakk5@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c        | 104 +++++++++++++++++++++++++++++++++++++
 tools/perf/util/evsel.h        |  16 +++++-
 tools/perf/util/parse-events.c | 115 ++++++-----------------------------------
 3 files changed, 134 insertions(+), 101 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index dab893804a14..47f1fe2feab8 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -128,6 +128,105 @@ static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size)
 	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
 }
 
+const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_EVSEL__MAX_ALIASES] = {
+ { "L1-dcache",	"l1-d",		"l1d",		"L1-data",		},
+ { "L1-icache",	"l1-i",		"l1i",		"L1-instruction",	},
+ { "LLC",	"L2",							},
+ { "dTLB",	"d-tlb",	"Data-TLB",				},
+ { "iTLB",	"i-tlb",	"Instruction-TLB",			},
+ { "branch",	"branches",	"bpu",		"btb",		"bpc",	},
+ { "node",								},
+};
+
+const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
+				   [PERF_EVSEL__MAX_ALIASES] = {
+ { "load",	"loads",	"read",					},
+ { "store",	"stores",	"write",				},
+ { "prefetch",	"prefetches",	"speculative-read", "speculative-load",	},
+};
+
+const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
+				       [PERF_EVSEL__MAX_ALIASES] = {
+ { "refs",	"Reference",	"ops",		"access",		},
+ { "misses",	"miss",							},
+};
+
+#define C(x)		PERF_COUNT_HW_CACHE_##x
+#define CACHE_READ	(1 << C(OP_READ))
+#define CACHE_WRITE	(1 << C(OP_WRITE))
+#define CACHE_PREFETCH	(1 << C(OP_PREFETCH))
+#define COP(x)		(1 << x)
+
+/*
+ * cache operartion stat
+ * L1I : Read and prefetch only
+ * ITLB and BPU : Read-only
+ */
+static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
+ [C(L1D)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(L1I)]	= (CACHE_READ | CACHE_PREFETCH),
+ [C(LL)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(DTLB)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(ITLB)]	= (CACHE_READ),
+ [C(BPU)]	= (CACHE_READ),
+ [C(NODE)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+};
+
+bool perf_evsel__is_cache_op_valid(u8 type, u8 op)
+{
+	if (perf_evsel__hw_cache_stat[type] & COP(op))
+		return true;	/* valid */
+	else
+		return false;	/* invalid */
+}
+
+int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
+					    char *bf, size_t size)
+{
+	if (result) {
+		return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0],
+				 perf_evsel__hw_cache_op[op][0],
+				 perf_evsel__hw_cache_result[result][0]);
+	}
+
+	return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0],
+			 perf_evsel__hw_cache_op[op][1]);
+}
+
+int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
+{
+	u8 op, result, type = (config >>  0) & 0xff;
+	const char *err = "unknown-ext-hardware-cache-type";
+
+	if (type > PERF_COUNT_HW_CACHE_MAX)
+		goto out_err;
+
+	op = (config >>  8) & 0xff;
+	err = "unknown-ext-hardware-cache-op";
+	if (op > PERF_COUNT_HW_CACHE_OP_MAX)
+		goto out_err;
+
+	result = (config >> 16) & 0xff;
+	err = "unknown-ext-hardware-cache-result";
+	if (result > PERF_COUNT_HW_CACHE_RESULT_MAX)
+		goto out_err;
+
+	err = "invalid-cache";
+	if (!perf_evsel__is_cache_op_valid(type, op))
+		goto out_err;
+
+	return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
+out_err:
+	return scnprintf(bf, size, "%s", err);
+}
+
+static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+	int ret = __perf_evsel__hw_cache_name(evsel->attr.config, bf, size);
+	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
+}
+
 int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size)
 {
 	int ret;
@@ -140,6 +239,11 @@ int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size)
 	case PERF_TYPE_HARDWARE:
 		ret = perf_evsel__hw_name(evsel, bf, size);
 		break;
+
+	case PERF_TYPE_HW_CACHE:
+		ret = perf_evsel__hw_cache_name(evsel, bf, size);
+		break;
+
 	default:
 		/*
 		 * FIXME
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4ba8b564e6f4..5bf946a05a6b 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -83,7 +83,21 @@ void perf_evsel__config(struct perf_evsel *evsel,
 			struct perf_record_opts *opts,
 			struct perf_evsel *first);
 
-const char* __perf_evsel__hw_name(u64 config);
+bool perf_evsel__is_cache_op_valid(u8 type, u8 op);
+
+#define PERF_EVSEL__MAX_ALIASES 8
+
+extern const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
+				       [PERF_EVSEL__MAX_ALIASES];
+extern const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
+					  [PERF_EVSEL__MAX_ALIASES];
+const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
+				       [PERF_EVSEL__MAX_ALIASES];
+int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
+					    char *bf, size_t size);
+int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size);
+
+const char *__perf_evsel__hw_name(u64 config);
 int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size);
 
 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 05dbc8b3c767..c8f8cf4a6920 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -74,51 +74,6 @@ static const char *sw_event_names[PERF_COUNT_SW_MAX] = {
 	"emulation-faults",
 };
 
-#define MAX_ALIASES 8
-
-static const char *hw_cache[PERF_COUNT_HW_CACHE_MAX][MAX_ALIASES] = {
- { "L1-dcache",	"l1-d",		"l1d",		"L1-data",		},
- { "L1-icache",	"l1-i",		"l1i",		"L1-instruction",	},
- { "LLC",	"L2",							},
- { "dTLB",	"d-tlb",	"Data-TLB",				},
- { "iTLB",	"i-tlb",	"Instruction-TLB",			},
- { "branch",	"branches",	"bpu",		"btb",		"bpc",	},
- { "node",								},
-};
-
-static const char *hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][MAX_ALIASES] = {
- { "load",	"loads",	"read",					},
- { "store",	"stores",	"write",				},
- { "prefetch",	"prefetches",	"speculative-read", "speculative-load",	},
-};
-
-static const char *hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
-				  [MAX_ALIASES] = {
- { "refs",	"Reference",	"ops",		"access",		},
- { "misses",	"miss",							},
-};
-
-#define C(x)		PERF_COUNT_HW_CACHE_##x
-#define CACHE_READ	(1 << C(OP_READ))
-#define CACHE_WRITE	(1 << C(OP_WRITE))
-#define CACHE_PREFETCH	(1 << C(OP_PREFETCH))
-#define COP(x)		(1 << x)
-
-/*
- * cache operartion stat
- * L1I : Read and prefetch only
- * ITLB and BPU : Read-only
- */
-static unsigned long hw_cache_stat[C(MAX)] = {
- [C(L1D)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
- [C(L1I)]	= (CACHE_READ | CACHE_PREFETCH),
- [C(LL)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
- [C(DTLB)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
- [C(ITLB)]	= (CACHE_READ),
- [C(BPU)]	= (CACHE_READ),
- [C(NODE)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
-};
-
 #define for_each_subsystem(sys_dir, sys_dirent, sys_next)	       \
 	while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next)	       \
 	if (sys_dirent.d_type == DT_DIR &&				       \
@@ -236,30 +191,6 @@ static const char *tracepoint_id_to_name(u64 config)
 	return buf;
 }
 
-static int is_cache_op_valid(u8 cache_type, u8 cache_op)
-{
-	if (hw_cache_stat[cache_type] & COP(cache_op))
-		return 1;	/* valid */
-	else
-		return 0;	/* invalid */
-}
-
-static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result)
-{
-	static char name[50];
-
-	if (cache_result) {
-		sprintf(name, "%s-%s-%s", hw_cache[cache_type][0],
-			hw_cache_op[cache_op][0],
-			hw_cache_result[cache_result][0]);
-	} else {
-		sprintf(name, "%s-%s", hw_cache[cache_type][0],
-			hw_cache_op[cache_op][1]);
-	}
-
-	return name;
-}
-
 const char *event_type(int type)
 {
 	switch (type) {
@@ -287,7 +218,7 @@ const char *event_name(struct perf_evsel *evsel)
 	u64 config = evsel->attr.config;
 	int type = evsel->attr.type;
 
-	if (type == PERF_TYPE_RAW || type == PERF_TYPE_HARDWARE) {
+	if (type == PERF_TYPE_RAW || type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) {
 		/*
  		 * XXX minimal fix, see comment on perf_evsen__name, this static buffer
  		 * will go away together with event_name in the next devel cycle.
@@ -316,26 +247,9 @@ const char *__event_name(int type, u64 config)
 	case PERF_TYPE_HARDWARE:
 		return __perf_evsel__hw_name(config);
 
-	case PERF_TYPE_HW_CACHE: {
-		u8 cache_type, cache_op, cache_result;
-
-		cache_type   = (config >>  0) & 0xff;
-		if (cache_type > PERF_COUNT_HW_CACHE_MAX)
-			return "unknown-ext-hardware-cache-type";
-
-		cache_op     = (config >>  8) & 0xff;
-		if (cache_op > PERF_COUNT_HW_CACHE_OP_MAX)
-			return "unknown-ext-hardware-cache-op";
-
-		cache_result = (config >> 16) & 0xff;
-		if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX)
-			return "unknown-ext-hardware-cache-result";
-
-		if (!is_cache_op_valid(cache_type, cache_op))
-			return "invalid-cache";
-
-		return event_cache_name(cache_type, cache_op, cache_result);
-	}
+	case PERF_TYPE_HW_CACHE:
+		__perf_evsel__hw_cache_name(config, buf, sizeof(buf));
+		return buf;
 
 	case PERF_TYPE_SOFTWARE:
 		if (config < PERF_COUNT_SW_MAX && sw_event_names[config])
@@ -379,13 +293,13 @@ static int add_event(struct list_head **_list, int *idx,
 	return 0;
 }
 
-static int parse_aliases(char *str, const char *names[][MAX_ALIASES], int size)
+static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
 {
 	int i, j;
 	int n, longest = -1;
 
 	for (i = 0; i < size; i++) {
-		for (j = 0; j < MAX_ALIASES && names[i][j]; j++) {
+		for (j = 0; j < PERF_EVSEL__MAX_ALIASES && names[i][j]; j++) {
 			n = strlen(names[i][j]);
 			if (n > longest && !strncasecmp(str, names[i][j], n))
 				longest = n;
@@ -410,7 +324,7 @@ int parse_events_add_cache(struct list_head **list, int *idx,
 	 * No fallback - if we cannot get a clear cache type
 	 * then bail out:
 	 */
-	cache_type = parse_aliases(type, hw_cache,
+	cache_type = parse_aliases(type, perf_evsel__hw_cache,
 				   PERF_COUNT_HW_CACHE_MAX);
 	if (cache_type == -1)
 		return -EINVAL;
@@ -423,18 +337,18 @@ int parse_events_add_cache(struct list_head **list, int *idx,
 		snprintf(name + n, MAX_NAME_LEN - n, "-%s\n", str);
 
 		if (cache_op == -1) {
-			cache_op = parse_aliases(str, hw_cache_op,
+			cache_op = parse_aliases(str, perf_evsel__hw_cache_op,
 						 PERF_COUNT_HW_CACHE_OP_MAX);
 			if (cache_op >= 0) {
-				if (!is_cache_op_valid(cache_type, cache_op))
+				if (!perf_evsel__is_cache_op_valid(cache_type, cache_op))
 					return -EINVAL;
 				continue;
 			}
 		}
 
 		if (cache_result == -1) {
-			cache_result = parse_aliases(str, hw_cache_result,
-						PERF_COUNT_HW_CACHE_RESULT_MAX);
+			cache_result = parse_aliases(str, perf_evsel__hw_cache_result,
+						     PERF_COUNT_HW_CACHE_RESULT_MAX);
 			if (cache_result >= 0)
 				continue;
 		}
@@ -970,16 +884,17 @@ void print_events_type(u8 type)
 int print_hwcache_events(const char *event_glob)
 {
 	unsigned int type, op, i, printed = 0;
+	char name[64];
 
 	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
 		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
 			/* skip invalid cache type */
-			if (!is_cache_op_valid(type, op))
+			if (!perf_evsel__is_cache_op_valid(type, op))
 				continue;
 
 			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
-				char *name = event_cache_name(type, op, i);
-
+				__perf_evsel__hw_cache_type_op_res_name(type, op, i,
+									name, sizeof(name));
 				if (event_glob != NULL && !strglobmatch(name, event_glob))
 					continue;
 
-- 
cgit v1.2.3


From 335c2f5d25319b208fb8b444e6f3099a806a33bf Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 11 Jun 2012 14:36:20 -0300
Subject: perf tools: Reconstruct sw event with modifiers from perf_event_attr

  [root@sandy ~]# perf record -e task-clock:u -a usleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.482 MB perf.data (~21073 samples) ]
  [root@sandy ~]#

Before:

  [root@sandy ~]# perf evlist
  task-clock
  [root@sandy ~]#

After:

  [root@sandy ~]# perf evlist
  task-clock:u
  [root@sandy ~]#

Ditto for other tools.

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-97ltkmj7v23kyhflltf6iz5n@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c        | 29 +++++++++++++++++++++++++++++
 tools/perf/util/evsel.h        |  2 ++
 tools/perf/util/parse-events.c | 19 +++----------------
 3 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 47f1fe2feab8..2da047331173 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -128,6 +128,31 @@ static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size)
 	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
 }
 
+static const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
+	"cpu-clock",
+	"task-clock",
+	"page-faults",
+	"context-switches",
+	"CPU-migrations",
+	"minor-faults",
+	"major-faults",
+	"alignment-faults",
+	"emulation-faults",
+};
+
+const char *__perf_evsel__sw_name(u64 config)
+{
+	if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config])
+		return perf_evsel__sw_names[config];
+	return "unknown-software";
+}
+
+static int perf_evsel__sw_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+	int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->attr.config));
+	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
 const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_EVSEL__MAX_ALIASES] = {
  { "L1-dcache",	"l1-d",		"l1d",		"L1-data",		},
@@ -244,6 +269,10 @@ int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size)
 		ret = perf_evsel__hw_cache_name(evsel, bf, size);
 		break;
 
+	case PERF_TYPE_SOFTWARE:
+		ret = perf_evsel__sw_name(evsel, bf, size);
+		break;
+
 	default:
 		/*
 		 * FIXME
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 5bf946a05a6b..9ae64d9622bc 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -98,6 +98,8 @@ int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
 int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size);
 
 const char *__perf_evsel__hw_name(u64 config);
+const char *__perf_evsel__sw_name(u64 config);
+
 int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size);
 
 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index c8f8cf4a6920..641c4ac8a838 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -62,18 +62,6 @@ static struct event_symbol event_symbols[] = {
 #define PERF_EVENT_TYPE(config)		__PERF_EVENT_FIELD(config, TYPE)
 #define PERF_EVENT_ID(config)		__PERF_EVENT_FIELD(config, EVENT)
 
-static const char *sw_event_names[PERF_COUNT_SW_MAX] = {
-	"cpu-clock",
-	"task-clock",
-	"page-faults",
-	"context-switches",
-	"CPU-migrations",
-	"minor-faults",
-	"major-faults",
-	"alignment-faults",
-	"emulation-faults",
-};
-
 #define for_each_subsystem(sys_dir, sys_dirent, sys_next)	       \
 	while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next)	       \
 	if (sys_dirent.d_type == DT_DIR &&				       \
@@ -218,7 +206,8 @@ const char *event_name(struct perf_evsel *evsel)
 	u64 config = evsel->attr.config;
 	int type = evsel->attr.type;
 
-	if (type == PERF_TYPE_RAW || type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) {
+	if (type == PERF_TYPE_RAW || type == PERF_TYPE_HARDWARE ||
+	    type == PERF_TYPE_SOFTWARE || type == PERF_TYPE_HW_CACHE) {
 		/*
  		 * XXX minimal fix, see comment on perf_evsen__name, this static buffer
  		 * will go away together with event_name in the next devel cycle.
@@ -252,9 +241,7 @@ const char *__event_name(int type, u64 config)
 		return buf;
 
 	case PERF_TYPE_SOFTWARE:
-		if (config < PERF_COUNT_SW_MAX && sw_event_names[config])
-			return sw_event_names[config];
-		return "unknown-software";
+		return __perf_evsel__sw_name(config);
 
 	case PERF_TYPE_TRACEPOINT:
 		return tracepoint_id_to_name(config);
-- 
cgit v1.2.3


From a446083604fe2bafe0f46b1e95b22f7b06e3a63f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 12 Jun 2012 10:29:12 -0300
Subject: perf evsel: Handle all event types in perf_evsel__name

Now to convert all event_name users to perf_evsel__name.

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-buuz0j0gynseglxa76r01rdn@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c        | 24 +++++++++++-------------
 tools/perf/util/parse-events.c | 21 +++------------------
 2 files changed, 14 insertions(+), 31 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 2da047331173..2ddc81271855 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -252,6 +252,11 @@ static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t
 	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
 }
 
+static int perf_evsel__tracepoint_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+	return scnprintf(bf, size, "%s", evsel->name ?: "unknown tracepoint");
+}
+
 int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size)
 {
 	int ret;
@@ -273,20 +278,13 @@ int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size)
 		ret = perf_evsel__sw_name(evsel, bf, size);
 		break;
 
+	case PERF_TYPE_TRACEPOINT:
+		ret = perf_evsel__tracepoint_name(evsel, bf, size);
+		break;
+
 	default:
-		/*
-		 * FIXME
- 		 *
-		 * This is the minimal perf_evsel__name so that we can
-		 * reconstruct event names taking into account event modifiers.
-		 *
-		 * The old event_name uses it now for raw anr hw events, so that
-		 * we don't drag all the parsing stuff into the python binding.
-		 *
-		 * On the next devel cycle the rest of the event naming will be
-		 * brought here.
- 		 */
-		return 0;
+		ret = scnprintf(bf, size, "%s", "unknown attr type");
+		break;
 	}
 
 	return ret;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 641c4ac8a838..d73690b11242 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -203,24 +203,9 @@ const char *event_type(int type)
 
 const char *event_name(struct perf_evsel *evsel)
 {
-	u64 config = evsel->attr.config;
-	int type = evsel->attr.type;
-
-	if (type == PERF_TYPE_RAW || type == PERF_TYPE_HARDWARE ||
-	    type == PERF_TYPE_SOFTWARE || type == PERF_TYPE_HW_CACHE) {
-		/*
- 		 * XXX minimal fix, see comment on perf_evsen__name, this static buffer
- 		 * will go away together with event_name in the next devel cycle.
- 		 */
-		static char bf[128];
-		perf_evsel__name(evsel, bf, sizeof(bf));
-		return bf;
-	}
-
-	if (evsel->name)
-		return evsel->name;
-
-	return __event_name(type, config);
+	static char bf[128];
+	perf_evsel__name(evsel, bf, sizeof(bf));
+	return bf;
 }
 
 const char *__event_name(int type, u64 config)
-- 
cgit v1.2.3


From 7289f83cceb437ca56c77eb45b8b1cda15e2e476 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 12 Jun 2012 12:34:58 -0300
Subject: perf tools: Move all users of event_name to perf_evsel__name

So that we don't use global variables that could make us misreport event
names when having a multi window top, for instance.

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-mccancovi1u0wdkg8ncth509@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-evlist.c    |  2 +-
 tools/perf/builtin-record.c    |  4 ++--
 tools/perf/builtin-report.c    |  6 +++---
 tools/perf/builtin-stat.c      | 12 ++++++------
 tools/perf/builtin-test.c      |  2 +-
 tools/perf/builtin-top.c       | 10 +++++-----
 tools/perf/ui/browsers/hists.c | 15 ++++-----------
 tools/perf/ui/gtk/browser.c    |  2 +-
 tools/perf/util/evsel.c        | 26 +++++++++++++-------------
 tools/perf/util/evsel.h        |  2 +-
 tools/perf/util/header.c       |  2 +-
 tools/perf/util/parse-events.c |  7 -------
 tools/perf/util/parse-events.h |  1 -
 tools/perf/util/session.c      |  2 +-
 tools/perf/util/top.c          |  2 +-
 15 files changed, 40 insertions(+), 55 deletions(-)

diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index acd78dc28341..0dd5a058f766 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -60,7 +60,7 @@ static int __cmd_evlist(const char *input_name, struct perf_attr_details *detail
 	list_for_each_entry(pos, &session->evlist->entries, node) {
 		bool first = true;
 
-		printf("%s", event_name(pos));
+		printf("%s", perf_evsel__name(pos));
 
 		if (details->verbose || details->freq) {
 			comma_printf(&first, " sample_freq=%" PRIu64,
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f95840d04e4c..f5a6452931e6 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -265,7 +265,7 @@ try_again:
 
 			if (err == ENOENT) {
 				ui__error("The %s event is not supported.\n",
-					    event_name(pos));
+					  perf_evsel__name(pos));
 				exit(EXIT_FAILURE);
 			}
 
@@ -916,7 +916,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
 		usage_with_options(record_usage, record_options);
 
 	list_for_each_entry(pos, &evsel_list->entries, node) {
-		if (perf_header__push_event(pos->attr.config, event_name(pos)))
+		if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
 			goto out_free_fd;
 	}
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 25249f76329d..ea8ce8e1c0d8 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -230,7 +230,7 @@ static int process_read_event(struct perf_tool *tool,
 	struct perf_report *rep = container_of(tool, struct perf_report, tool);
 
 	if (rep->show_threads) {
-		const char *name = evsel ? event_name(evsel) : "unknown";
+		const char *name = evsel ? perf_evsel__name(evsel) : "unknown";
 		perf_read_values_add_value(&rep->show_threads_values,
 					   event->read.pid, event->read.tid,
 					   event->read.id,
@@ -239,7 +239,7 @@ static int process_read_event(struct perf_tool *tool,
 	}
 
 	dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid,
-		    evsel ? event_name(evsel) : "FAIL",
+		    evsel ? perf_evsel__name(evsel) : "FAIL",
 		    event->read.value);
 
 	return 0;
@@ -314,7 +314,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 
 	list_for_each_entry(pos, &evlist->entries, node) {
 		struct hists *hists = &pos->hists;
-		const char *evname = event_name(pos);
+		const char *evname = perf_evsel__name(pos);
 
 		hists__fprintf_nr_sample_events(hists, evname, stdout);
 		hists__fprintf(hists, NULL, false, true, 0, 0, stdout);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 262589991ea4..875bf2675326 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -391,7 +391,7 @@ static int read_counter_aggr(struct perf_evsel *counter)
 
 	if (verbose) {
 		fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
-			event_name(counter), count[0], count[1], count[2]);
+			perf_evsel__name(counter), count[0], count[1], count[2]);
 	}
 
 	/*
@@ -496,7 +496,7 @@ static int run_perf_stat(int argc __used, const char **argv)
 			    errno == ENXIO) {
 				if (verbose)
 					ui__warning("%s event is not supported by the kernel.\n",
-						    event_name(counter));
+						    perf_evsel__name(counter));
 				counter->supported = false;
 				continue;
 			}
@@ -594,7 +594,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
 			csv_output ? 0 : -4,
 			evsel_list->cpus->map[cpu], csv_sep);
 
-	fprintf(output, fmt, cpustr, msecs, csv_sep, event_name(evsel));
+	fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel));
 
 	if (evsel->cgrp)
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
@@ -792,7 +792,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 	else
 		cpu = 0;
 
-	fprintf(output, fmt, cpustr, avg, csv_sep, event_name(evsel));
+	fprintf(output, fmt, cpustr, avg, csv_sep, perf_evsel__name(evsel));
 
 	if (evsel->cgrp)
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
@@ -908,7 +908,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
 			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
 			csv_sep,
 			csv_output ? 0 : -24,
-			event_name(counter));
+			perf_evsel__name(counter));
 
 		if (counter->cgrp)
 			fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
@@ -961,7 +961,7 @@ static void print_counter(struct perf_evsel *counter)
 				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
 				csv_sep,
 				csv_output ? 0 : -24,
-				event_name(counter));
+				perf_evsel__name(counter));
 
 			if (counter->cgrp)
 				fprintf(output, "%s%s",
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 5a8727c08757..5ce30305462b 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -583,7 +583,7 @@ static int test__basic_mmap(void)
 		if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
 			pr_debug("expected %d %s events, got %d\n",
 				 expected_nr_events[evsel->idx],
-				 event_name(evsel), nr_events[evsel->idx]);
+				 perf_evsel__name(evsel), nr_events[evsel->idx]);
 			goto out_munmap;
 		}
 	}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 6bb0277b7dfe..8090a280578c 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -245,7 +245,7 @@ static void perf_top__show_details(struct perf_top *top)
 	if (notes->src == NULL)
 		goto out_unlock;
 
-	printf("Showing %s for %s\n", event_name(top->sym_evsel), symbol->name);
+	printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);
 	printf("  Events  Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
 
 	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx,
@@ -408,7 +408,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top)
 	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
 
 	if (top->evlist->nr_entries > 1)
-		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(top->sym_evsel));
+		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", perf_evsel__name(top->sym_evsel));
 
 	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
 
@@ -503,13 +503,13 @@ static void perf_top__handle_keypress(struct perf_top *top, int c)
 				fprintf(stderr, "\nAvailable events:");
 
 				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
-					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, event_name(top->sym_evsel));
+					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));
 
 				prompt_integer(&counter, "Enter details event counter");
 
 				if (counter >= top->evlist->nr_entries) {
 					top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
-					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top->sym_evsel));
+					fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel));
 					sleep(1);
 					break;
 				}
@@ -960,7 +960,7 @@ try_again:
 
 			if (err == ENOENT) {
 				ui__error("The %s event is not supported.\n",
-					    event_name(counter));
+					  perf_evsel__name(counter));
 				goto out_err;
 			} else if (err == EMFILE) {
 				ui__error("Too many events are opened.\n"
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index f556e5f6388b..482f0517b61e 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1367,7 +1367,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
 	struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node);
 	bool current_entry = ui_browser__is_current_entry(browser, row);
 	unsigned long nr_events = evsel->hists.stats.nr_events[PERF_RECORD_SAMPLE];
-	const char *ev_name = event_name(evsel);
+	const char *ev_name = perf_evsel__name(evsel);
 	char bf[256], unit;
 	const char *warn = " ";
 	size_t printed;
@@ -1435,7 +1435,7 @@ browse_hists:
 			 */
 			if (timer)
 				timer(arg);
-			ev_name = event_name(pos);
+			ev_name = perf_evsel__name(pos);
 			key = perf_evsel__hists_browse(pos, nr_events, help,
 						       ev_name, true, timer,
 						       arg, delay_secs);
@@ -1504,17 +1504,11 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
 	ui_helpline__push("Press ESC to exit");
 
 	list_for_each_entry(pos, &evlist->entries, node) {
-		const char *ev_name = event_name(pos);
+		const char *ev_name = perf_evsel__name(pos);
 		size_t line_len = strlen(ev_name) + 7;
 
 		if (menu.b.width < line_len)
 			menu.b.width = line_len;
-		/*
-		 * Cache the evsel name, tracepoints have a _high_ cost per
-		 * event_name() call.
-		 */
-		if (pos->name == NULL)
-			pos->name = strdup(ev_name);
 	}
 
 	return perf_evsel_menu__run(&menu, evlist->nr_entries, help, timer,
@@ -1525,11 +1519,10 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
 				  void(*timer)(void *arg), void *arg,
 				  int delay_secs)
 {
-
 	if (evlist->nr_entries == 1) {
 		struct perf_evsel *first = list_entry(evlist->entries.next,
 						      struct perf_evsel, node);
-		const char *ev_name = event_name(first);
+		const char *ev_name = perf_evsel__name(first);
 		return perf_evsel__hists_browse(first, evlist->nr_entries, help,
 						ev_name, false, timer, arg,
 						delay_secs);
diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c
index fd41e8d10337..ec12e0b4ded6 100644
--- a/tools/perf/ui/gtk/browser.c
+++ b/tools/perf/ui/gtk/browser.c
@@ -199,7 +199,7 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
 
 	list_for_each_entry(pos, &evlist->entries, node) {
 		struct hists *hists = &pos->hists;
-		const char *evname = event_name(pos);
+		const char *evname = perf_evsel__name(pos);
 		GtkWidget *scrolled_window;
 		GtkWidget *tab_label;
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 2ddc81271855..236bdf25db6d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -252,42 +252,42 @@ static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t
 	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
 }
 
-static int perf_evsel__tracepoint_name(struct perf_evsel *evsel, char *bf, size_t size)
+const char *perf_evsel__name(struct perf_evsel *evsel)
 {
-	return scnprintf(bf, size, "%s", evsel->name ?: "unknown tracepoint");
-}
+	char bf[128];
 
-int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size)
-{
-	int ret;
+	if (evsel->name)
+		return evsel->name;
 
 	switch (evsel->attr.type) {
 	case PERF_TYPE_RAW:
-		ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config);
+		scnprintf(bf, sizeof(bf), "raw 0x%" PRIx64, evsel->attr.config);
 		break;
 
 	case PERF_TYPE_HARDWARE:
-		ret = perf_evsel__hw_name(evsel, bf, size);
+		perf_evsel__hw_name(evsel, bf, sizeof(bf));
 		break;
 
 	case PERF_TYPE_HW_CACHE:
-		ret = perf_evsel__hw_cache_name(evsel, bf, size);
+		perf_evsel__hw_cache_name(evsel, bf, sizeof(bf));
 		break;
 
 	case PERF_TYPE_SOFTWARE:
-		ret = perf_evsel__sw_name(evsel, bf, size);
+		perf_evsel__sw_name(evsel, bf, sizeof(bf));
 		break;
 
 	case PERF_TYPE_TRACEPOINT:
-		ret = perf_evsel__tracepoint_name(evsel, bf, size);
+		scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint");
 		break;
 
 	default:
-		ret = scnprintf(bf, size, "%s", "unknown attr type");
+		scnprintf(bf, sizeof(bf), "%s", "unknown attr type");
 		break;
 	}
 
-	return ret;
+	evsel->name = strdup(bf);
+
+	return evsel->name ?: "unknown";
 }
 
 void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts,
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 9ae64d9622bc..c936feb4e0a6 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -100,7 +100,7 @@ int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size);
 const char *__perf_evsel__hw_name(u64 config);
 const char *__perf_evsel__sw_name(u64 config);
 
-int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size);
+const char *perf_evsel__name(struct perf_evsel *evsel);
 
 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 2dd5edf161b7..07c8f3792954 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -641,7 +641,7 @@ static int write_event_desc(int fd, struct perf_header *h __used,
 		/*
 		 * write event string as passed on cmdline
 		 */
-		ret = do_write_string(fd, event_name(attr));
+		ret = do_write_string(fd, perf_evsel__name(attr));
 		if (ret < 0)
 			return ret;
 		/*
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index d73690b11242..517e6473982c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -201,13 +201,6 @@ const char *event_type(int type)
 	return "unknown";
 }
 
-const char *event_name(struct perf_evsel *evsel)
-{
-	static char bf[128];
-	perf_evsel__name(evsel, bf, sizeof(bf));
-	return bf;
-}
-
 const char *__event_name(int type, u64 config)
 {
 	static char buf[32];
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 8cac57ab4ee6..d7eb070937c4 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -26,7 +26,6 @@ extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
 extern bool have_tracepoints(struct list_head *evlist);
 
 const char *event_type(int type);
-const char *event_name(struct perf_evsel *event);
 extern const char *__event_name(int type, u64 config);
 
 extern int parse_events_option(const struct option *opt, const char *str,
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 2600916efa83..582ee38ed216 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1439,7 +1439,7 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
 	ret += hists__fprintf_nr_events(&session->hists, fp);
 
 	list_for_each_entry(pos, &session->evlist->entries, node) {
-		ret += fprintf(fp, "%s stats:\n", event_name(pos));
+		ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos));
 		ret += hists__fprintf_nr_events(&pos->hists, fp);
 	}
 
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index abe0e8e95068..7eeebcee291c 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -65,7 +65,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
 				top->freq ? "Hz" : "");
 	}
 
-	ret += SNPRINTF(bf + ret, size - ret, "%s", event_name(top->sym_evsel));
+	ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel));
 
 	ret += SNPRINTF(bf + ret, size - ret, "], ");
 
-- 
cgit v1.2.3


From 5bff01f66db1753abcae03a06b21e56e7e9d9fa9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 12 Jun 2012 13:35:44 -0300
Subject: perf script: Replace __event_name uses with perf_evsel__name

No logic change, just remove one more user of __event_name().

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-e4f0vuy3283hmzfjjvkgm7fo@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 8e395a538eb9..8f9f9b6140db 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -137,10 +137,11 @@ static const char *output_field2str(enum perf_output_field field)
 
 #define PRINT_FIELD(x)  (output[attr->type].fields & PERF_OUTPUT_##x)
 
-static int perf_event_attr__check_stype(struct perf_event_attr *attr,
-				  u64 sample_type, const char *sample_msg,
-				  enum perf_output_field field)
+static int perf_evsel__check_stype(struct perf_evsel *evsel,
+				   u64 sample_type, const char *sample_msg,
+				   enum perf_output_field field)
 {
+	struct perf_event_attr *attr = &evsel->attr;
 	int type = attr->type;
 	const char *evname;
 
@@ -148,7 +149,7 @@ static int perf_event_attr__check_stype(struct perf_event_attr *attr,
 		return 0;
 
 	if (output[type].user_set) {
-		evname = __event_name(attr->type, attr->config);
+		evname = perf_evsel__name(evsel);
 		pr_err("Samples for '%s' event do not have %s attribute set. "
 		       "Cannot print '%s' field.\n",
 		       evname, sample_msg, output_field2str(field));
@@ -157,7 +158,7 @@ static int perf_event_attr__check_stype(struct perf_event_attr *attr,
 
 	/* user did not ask for it explicitly so remove from the default list */
 	output[type].fields &= ~field;
-	evname = __event_name(attr->type, attr->config);
+	evname = perf_evsel__name(evsel);
 	pr_debug("Samples for '%s' event do not have %s attribute set. "
 		 "Skipping '%s' field.\n",
 		 evname, sample_msg, output_field2str(field));
@@ -175,8 +176,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 		return -EINVAL;
 
 	if (PRINT_FIELD(IP)) {
-		if (perf_event_attr__check_stype(attr, PERF_SAMPLE_IP, "IP",
-					   PERF_OUTPUT_IP))
+		if (perf_evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP",
+					    PERF_OUTPUT_IP))
 			return -EINVAL;
 
 		if (!no_callchain &&
@@ -185,8 +186,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 	}
 
 	if (PRINT_FIELD(ADDR) &&
-		perf_event_attr__check_stype(attr, PERF_SAMPLE_ADDR, "ADDR",
-				       PERF_OUTPUT_ADDR))
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR",
+					PERF_OUTPUT_ADDR))
 		return -EINVAL;
 
 	if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
@@ -208,18 +209,18 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 	}
 
 	if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
-		perf_event_attr__check_stype(attr, PERF_SAMPLE_TID, "TID",
-				       PERF_OUTPUT_TID|PERF_OUTPUT_PID))
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
+					PERF_OUTPUT_TID|PERF_OUTPUT_PID))
 		return -EINVAL;
 
 	if (PRINT_FIELD(TIME) &&
-		perf_event_attr__check_stype(attr, PERF_SAMPLE_TIME, "TIME",
-				       PERF_OUTPUT_TIME))
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME",
+					PERF_OUTPUT_TIME))
 		return -EINVAL;
 
 	if (PRINT_FIELD(CPU) &&
-		perf_event_attr__check_stype(attr, PERF_SAMPLE_CPU, "CPU",
-				       PERF_OUTPUT_CPU))
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_CPU, "CPU",
+					PERF_OUTPUT_CPU))
 		return -EINVAL;
 
 	return 0;
@@ -258,9 +259,10 @@ static int perf_session__check_output_opt(struct perf_session *session)
 
 static void print_sample_start(struct perf_sample *sample,
 			       struct thread *thread,
-			       struct perf_event_attr *attr)
+			       struct perf_evsel *evsel)
 {
 	int type;
+	struct perf_event_attr *attr = &evsel->attr;
 	struct event_format *event;
 	const char *evname = NULL;
 	unsigned long secs;
@@ -305,7 +307,7 @@ static void print_sample_start(struct perf_sample *sample,
 			if (event)
 				evname = event->name;
 		} else
-			evname = __event_name(attr->type, attr->config);
+			evname = perf_evsel__name(evsel);
 
 		printf("%s: ", evname ? evname : "[unknown]");
 	}
@@ -412,7 +414,7 @@ static void process_event(union perf_event *event __unused,
 	if (output[attr->type].fields == 0)
 		return;
 
-	print_sample_start(sample, thread, attr);
+	print_sample_start(sample, thread, evsel);
 
 	if (is_bts_event(attr)) {
 		print_sample_bts(event, sample, evsel, machine, thread);
-- 
cgit v1.2.3


From 22c8b84320ecf9ecbb6587d46a259b828e9ece5e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 12 Jun 2012 13:55:13 -0300
Subject: perf tools: Don't access evsel->name directly

One needs to use perf_evsel__name() so that if needed the name gets
synthesized and stored in evsel->name, from where perf_evsel__name()
will serve from them on.

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-ml7zbenjmri9bghmrea0jm0d@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-sched.c          | 2 +-
 tools/perf/util/parse-events-test.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index b125e07eb399..9fe77b185338 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1601,7 +1601,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool,
 
 	if (thread == NULL) {
 		pr_debug("problem processing %s event, skipping it.\n",
-			 evsel->name);
+			 perf_evsel__name(evsel));
 		return -1;
 	}
 
diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c
index 76b98e2a587d..d0cf7c1ed068 100644
--- a/tools/perf/util/parse-events-test.c
+++ b/tools/perf/util/parse-events-test.c
@@ -418,14 +418,14 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
 	TEST_ASSERT_VAL("wrong config",  1 == evsel->attr.config);
-	TEST_ASSERT_VAL("wrong name", !strcmp(evsel->name, "krava"));
+	TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava"));
 
 	/* cpu/config=2/" */
 	evsel = list_entry(evsel->node.next, struct perf_evsel, node);
 	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
 	TEST_ASSERT_VAL("wrong config",  2 == evsel->attr.config);
-	TEST_ASSERT_VAL("wrong name", !strcmp(evsel->name, "raw 0x2"));
+	TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "raw 0x2"));
 
 	return 0;
 }
-- 
cgit v1.2.3


From 9db1763c72b1548626ae9d634015de4f07ef624f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 12 Jun 2012 13:45:00 -0300
Subject: perf tools: Remove __event_name

Not needed anymore, the parsing code can just leave evsel->name as NULL
and the first call to perf_evsel__name() will do exactly what was being
pre-cached using __event_name().

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-cn2eiijcinnc97buod8cs34m@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c | 61 +++++-------------------------------------
 tools/perf/util/parse-events.h |  1 -
 2 files changed, 6 insertions(+), 56 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 517e6473982c..eacf932a36a0 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -161,24 +161,6 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
 	return NULL;
 }
 
-#define TP_PATH_LEN (MAX_EVENT_LENGTH * 2 + 1)
-static const char *tracepoint_id_to_name(u64 config)
-{
-	static char buf[TP_PATH_LEN];
-	struct tracepoint_path *path;
-
-	path = tracepoint_id_to_path(config);
-	if (path) {
-		snprintf(buf, TP_PATH_LEN, "%s:%s", path->system, path->name);
-		free(path->name);
-		free(path->system);
-		free(path);
-	} else
-		snprintf(buf, TP_PATH_LEN, "%s:%s", "unknown", "unknown");
-
-	return buf;
-}
-
 const char *event_type(int type)
 {
 	switch (type) {
@@ -201,36 +183,6 @@ const char *event_type(int type)
 	return "unknown";
 }
 
-const char *__event_name(int type, u64 config)
-{
-	static char buf[32];
-
-	if (type == PERF_TYPE_RAW) {
-		sprintf(buf, "raw 0x%" PRIx64, config);
-		return buf;
-	}
-
-	switch (type) {
-	case PERF_TYPE_HARDWARE:
-		return __perf_evsel__hw_name(config);
-
-	case PERF_TYPE_HW_CACHE:
-		__perf_evsel__hw_cache_name(config, buf, sizeof(buf));
-		return buf;
-
-	case PERF_TYPE_SOFTWARE:
-		return __perf_evsel__sw_name(config);
-
-	case PERF_TYPE_TRACEPOINT:
-		return tracepoint_id_to_name(config);
-
-	default:
-		break;
-	}
-
-	return "unknown";
-}
-
 static int add_event(struct list_head **_list, int *idx,
 		     struct perf_event_attr *attr, char *name)
 {
@@ -252,7 +204,8 @@ static int add_event(struct list_head **_list, int *idx,
 		return -ENOMEM;
 	}
 
-	evsel->name = strdup(name);
+	if (name)
+		evsel->name = strdup(name);
 	list_add_tail(&evsel->node, list);
 	*_list = list;
 	return 0;
@@ -545,8 +498,7 @@ int parse_events_add_numeric(struct list_head **list, int *idx,
 	    config_attr(&attr, head_config, 1))
 		return -EINVAL;
 
-	return add_event(list, idx, &attr,
-			 (char *) __event_name(type, config));
+	return add_event(list, idx, &attr, NULL);
 }
 
 static int parse_events__is_name_term(struct parse_events__term *term)
@@ -554,8 +506,7 @@ static int parse_events__is_name_term(struct parse_events__term *term)
 	return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME;
 }
 
-static char *pmu_event_name(struct perf_event_attr *attr,
-			    struct list_head *head_terms)
+static char *pmu_event_name(struct list_head *head_terms)
 {
 	struct parse_events__term *term;
 
@@ -563,7 +514,7 @@ static char *pmu_event_name(struct perf_event_attr *attr,
 		if (parse_events__is_name_term(term))
 			return term->val.str;
 
-	return (char *) __event_name(PERF_TYPE_RAW, attr->config);
+	return NULL;
 }
 
 int parse_events_add_pmu(struct list_head **list, int *idx,
@@ -588,7 +539,7 @@ int parse_events_add_pmu(struct list_head **list, int *idx,
 		return -EINVAL;
 
 	return add_event(list, idx, &attr,
-			 pmu_event_name(&attr, head_config));
+			 pmu_event_name(head_config));
 }
 
 void parse_events_update_lists(struct list_head *list_event,
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index d7eb070937c4..1784f06e3a6a 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -26,7 +26,6 @@ extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
 extern bool have_tracepoints(struct list_head *evlist);
 
 const char *event_type(int type);
-extern const char *__event_name(int type, u64 config);
 
 extern int parse_events_option(const struct option *opt, const char *str,
 			       int unset);
-- 
cgit v1.2.3


From 6eef3d9c2bcf52b7a3c18e609f5838c007b989a4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 13 Jun 2012 11:53:37 -0300
Subject: perf evsel: Reconstruct raw event with modifiers from perf_event_attr

I forgot to add the modifiers to raw events too, fix it.

Reported-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-pi267j1aqqjti9rqh9qy4g58@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 236bdf25db6d..8f0e9dd03775 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -252,6 +252,12 @@ static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t
 	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
 }
 
+static int perf_evsel__raw_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+	int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config);
+	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
+}
+
 const char *perf_evsel__name(struct perf_evsel *evsel)
 {
 	char bf[128];
@@ -261,7 +267,7 @@ const char *perf_evsel__name(struct perf_evsel *evsel)
 
 	switch (evsel->attr.type) {
 	case PERF_TYPE_RAW:
-		scnprintf(bf, sizeof(bf), "raw 0x%" PRIx64, evsel->attr.config);
+		perf_evsel__raw_name(evsel, bf, sizeof(bf));
 		break;
 
 	case PERF_TYPE_HARDWARE:
-- 
cgit v1.2.3


From a9c34a9f9c677fcbe06bd3eda8d6caa3487b4a65 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Mon, 11 Jun 2012 15:20:03 +0200
Subject: perf tools: Remove unused evsel parameter from
 machine__resolve_callchain

Removing unused evsel parameter from machine__resolve_callchain
function. Plus related header file and callers changes.

The evsel parameter is unused since following commit:
  perf callchain: Make callchain cursors TLS
  commit 472606458f3e1ced5fe3cc5f04e90a6b5a4732cf
  Author: Namhyung Kim <namhyung.kim@lge.com>
  Date:   Thu May 31 14:43:26 2012 +0900

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Arun Sharma <asharma@fb.com>
Cc: Benjamin Redelings <benjamin.redelings@nescent.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/r/1339420814-7379-9-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-report.c | 4 ++--
 tools/perf/builtin-script.c | 4 ++--
 tools/perf/builtin-top.c    | 2 +-
 tools/perf/util/map.h       | 2 +-
 tools/perf/util/session.c   | 7 +++----
 tools/perf/util/session.h   | 4 ++--
 6 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index ea8ce8e1c0d8..40b0ffc3ad3b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -69,7 +69,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
 
 	if ((sort__has_parent || symbol_conf.use_callchain)
 	    && sample->callchain) {
-		err = machine__resolve_callchain(machine, evsel, al->thread,
+		err = machine__resolve_callchain(machine, al->thread,
 						 sample->callchain, &parent);
 		if (err)
 			return err;
@@ -140,7 +140,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
 	struct hist_entry *he;
 
 	if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
-		err = machine__resolve_callchain(machine, evsel, al->thread,
+		err = machine__resolve_callchain(machine, al->thread,
 						 sample->callchain, &parent);
 		if (err)
 			return err;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 8f9f9b6140db..8fecd3b8130a 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -389,7 +389,7 @@ static void print_sample_bts(union perf_event *event,
 			printf(" ");
 		else
 			printf("\n");
-		perf_event__print_ip(event, sample, machine, evsel,
+		perf_event__print_ip(event, sample, machine,
 				     PRINT_FIELD(SYM), PRINT_FIELD(DSO),
 				     PRINT_FIELD(SYMOFFSET));
 	}
@@ -433,7 +433,7 @@ static void process_event(union perf_event *event __unused,
 			printf(" ");
 		else
 			printf("\n");
-		perf_event__print_ip(event, sample, machine, evsel,
+		perf_event__print_ip(event, sample, machine,
 				     PRINT_FIELD(SYM), PRINT_FIELD(DSO),
 				     PRINT_FIELD(SYMOFFSET));
 	}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 8090a280578c..e3cab5f088f8 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -774,7 +774,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 
 		if ((sort__has_parent || symbol_conf.use_callchain) &&
 		    sample->callchain) {
-			err = machine__resolve_callchain(machine, evsel, al.thread,
+			err = machine__resolve_callchain(machine, al.thread,
 							 sample->callchain, &parent);
 			if (err)
 				return;
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 81371bad4ef0..c14c665d9a25 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -157,7 +157,7 @@ void machine__exit(struct machine *self);
 void machine__delete(struct machine *self);
 
 int machine__resolve_callchain(struct machine *machine,
-			       struct perf_evsel *evsel, struct thread *thread,
+			       struct thread *thread,
 			       struct ip_callchain *chain,
 			       struct symbol **parent);
 int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name,
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 582ee38ed216..febc0aeb3c66 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -289,7 +289,6 @@ struct branch_info *machine__resolve_bstack(struct machine *self,
 }
 
 int machine__resolve_callchain(struct machine *self,
-			       struct perf_evsel *evsel __used,
 			       struct thread *thread,
 			       struct ip_callchain *chain,
 			       struct symbol **parent)
@@ -1480,8 +1479,8 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
 }
 
 void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
-			  struct machine *machine, struct perf_evsel *evsel,
-			  int print_sym, int print_dso, int print_symoffset)
+			  struct machine *machine, int print_sym,
+			  int print_dso, int print_symoffset)
 {
 	struct addr_location al;
 	struct callchain_cursor_node *node;
@@ -1495,7 +1494,7 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
 
 	if (symbol_conf.use_callchain && sample->callchain) {
 
-		if (machine__resolve_callchain(machine, evsel, al.thread,
+		if (machine__resolve_callchain(machine, al.thread,
 						sample->callchain, NULL) != 0) {
 			if (verbose)
 				error("Failed to resolve callchain. Skipping\n");
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 7a5434c00565..877d78186f2c 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -150,8 +150,8 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
 					    unsigned int type);
 
 void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
-			  struct machine *machine, struct perf_evsel *evsel,
-			  int print_sym, int print_dso, int print_symoffset);
+			  struct machine *machine, int print_sym,
+			  int print_dso, int print_symoffset);
 
 int perf_session__cpu_bitmap(struct perf_session *session,
 			     const char *cpu_list, unsigned long *cpu_bitmap);
-- 
cgit v1.2.3


From dd4f52232c60bcb41205a67d2df2ac0ecdfe3683 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 13 Jun 2012 15:52:42 -0300
Subject: perf evsel: Make some methods private

Now that __event_name is gone, no need to export __perf_evsel__[hs]w_name().

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-rpjnarbt83nu9uowrfatmy12@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 6 +++---
 tools/perf/util/evsel.h | 5 -----
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 8f0e9dd03775..876f639d69ed 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -78,7 +78,7 @@ static const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
 	"ref-cycles",
 };
 
-const char *__perf_evsel__hw_name(u64 config)
+static const char *__perf_evsel__hw_name(u64 config)
 {
 	if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config])
 		return perf_evsel__hw_names[config];
@@ -140,7 +140,7 @@ static const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
 	"emulation-faults",
 };
 
-const char *__perf_evsel__sw_name(u64 config)
+static const char *__perf_evsel__sw_name(u64 config)
 {
 	if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config])
 		return perf_evsel__sw_names[config];
@@ -219,7 +219,7 @@ int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
 			 perf_evsel__hw_cache_op[op][1]);
 }
 
-int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
+static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
 {
 	u8 op, result, type = (config >>  0) & 0xff;
 	const char *err = "unknown-ext-hardware-cache-type";
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index c936feb4e0a6..67cc5033d192 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -95,11 +95,6 @@ const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
 				       [PERF_EVSEL__MAX_ALIASES];
 int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
 					    char *bf, size_t size);
-int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size);
-
-const char *__perf_evsel__hw_name(u64 config);
-const char *__perf_evsel__sw_name(u64 config);
-
 const char *perf_evsel__name(struct perf_evsel *evsel);
 
 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
-- 
cgit v1.2.3


From c0a58fb2bdf033df433cad9009c7dac4c6b872b0 Mon Sep 17 00:00:00 2001
From: Samuel Liao <samuelliao@tencent.com>
Date: Tue, 5 Jun 2012 13:14:59 +0800
Subject: perf annotate: Check null of sym pointer before using it

Sym may be NULL, and that will cause perf to crash.

Signed-off-by: Shan Wei <davidshan@tencent.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/4FCD95D3.90209@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 34b1c46eaf42..67a2703e666a 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -814,7 +814,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
 {
 	struct disasm_line *pos, *n;
 	struct annotation *notes;
-	const size_t size = symbol__size(sym);
+	size_t size;
 	struct map_symbol ms = {
 		.map = map,
 		.sym = sym,
@@ -834,6 +834,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
 	if (sym == NULL)
 		return -1;
 
+	size = symbol__size(sym);
+
 	if (map->dso->annotate_warned)
 		return -1;
 
-- 
cgit v1.2.3


From 0718467c859f5571dc48d294596f841096f6a47a Mon Sep 17 00:00:00 2001
From: Li Zhong <zhong@linux.vnet.ibm.com>
Date: Mon, 18 Jun 2012 15:56:33 -0400
Subject: x86/nmi: Clean up register_nmi_handler() usage

Implement a cleaner and easier to maintain version for the section
warning fixes implemented in commit eeaaa96a3a21
("x86/nmi: Fix section mismatch warnings on 32-bit").

Signed-off-by: Li Zhong <zhong@linux.vnet.ibm.com>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Jan Beulich <JBeulich@suse.com>
Link: http://lkml.kernel.org/r/1340049393-17771-1-git-send-email-dzickus@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/nmi.h     | 20 +++-----------------
 arch/x86/kernel/nmi_selftest.c |  7 ++++---
 2 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index dc580c42851c..c0fa356e90de 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -44,28 +44,14 @@ struct nmiaction {
 	const char		*name;
 };
 
-#define register_nmi_handler(t, fn, fg, n)		\
+#define register_nmi_handler(t, fn, fg, n, init...)	\
 ({							\
-	static struct nmiaction fn##_na = {		\
+	static struct nmiaction init fn##_na = {	\
 		.handler = (fn),			\
 		.name = (n),				\
 		.flags = (fg),				\
 	};						\
-	__register_nmi_handler((t), &fn##_na);	\
-})
-
-/*
- * For special handlers that register/unregister in the
- * init section only.  This should be considered rare.
- */
-#define register_nmi_handler_initonly(t, fn, fg, n)		\
-({							\
-	static struct nmiaction fn##_na __initdata = {		\
-		.handler = (fn),			\
-		.name = (n),				\
-		.flags = (fg),				\
-	};						\
-	__register_nmi_handler((t), &fn##_na);	\
+	__register_nmi_handler((t), &fn##_na);		\
 })
 
 int __register_nmi_handler(unsigned int, struct nmiaction *);
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index 149b8d9c6ad4..6d9582ec0324 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -42,7 +42,8 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs)
 static void __init init_nmi_testsuite(void)
 {
 	/* trap all the unknown NMIs we may generate */
-	register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
+	register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk",
+			__initdata);
 }
 
 static void __init cleanup_nmi_testsuite(void)
@@ -64,8 +65,8 @@ static void __init test_nmi_ipi(struct cpumask *mask)
 {
 	unsigned long timeout;
 
-	if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback,
-				 NMI_FLAG_FIRST, "nmi_selftest")) {
+	if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback,
+				 NMI_FLAG_FIRST, "nmi_selftest", __initdata)) {
 		nmi_fail = FAILURE;
 		return;
 	}
-- 
cgit v1.2.3


From e1b6fc55da40bc17e20795901cb786e3619f9be9 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Mon, 18 Jun 2012 11:28:45 +0100
Subject: x86/microcode: Mark microcode_id[] as __initconst

It's not being used for other than creating module aliases (i.e.
no loadable section has any reference to it).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Link: http://lkml.kernel.org/r/4FDF1EFD020000780008A65D@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/microcode_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index fbdfc6917180..c383b3f8f397 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -504,7 +504,7 @@ static struct notifier_block __refdata mc_cpu_notifier = {
 
 #ifdef MODULE
 /* Autoload on Intel and AMD systems */
-static const struct x86_cpu_id microcode_id[] = {
+static const struct x86_cpu_id __initconst microcode_id[] = {
 #ifdef CONFIG_MICROCODE_INTEL
 	{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, },
 #endif
-- 
cgit v1.2.3


From 0d26d1d873a302828e064737746c53a2689e6c0f Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Mon, 18 Jun 2012 11:30:20 +0100
Subject: x86/mm: Mark free_initrd_mem() as __init

... matching various other architectures.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Link: http://lkml.kernel.org/r/4FDF1F5C020000780008A661@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index bc4e9d84157f..e0e6990723e9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -385,7 +385,7 @@ void free_initmem(void)
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
+void __init free_initrd_mem(unsigned long start, unsigned long end)
 {
 	/*
 	 * end could be not aligned, and We can not align that,
-- 
cgit v1.2.3


From 0fa0e2f02e8edfbdb5f86d1cab0fa6dc0517489f Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Mon, 18 Jun 2012 11:40:04 +0100
Subject: x86: Move call to print_modules() out of show_regs()

Printing the list of loaded modules is really unrelated to what
this function is about, and is particularly unnecessary in the
context of the SysRQ key handling (gets printed so far over and
over).

It should really be the caller of the function to decide whether
this piece of information is useful (and to avoid redundantly
printing it).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Link: http://lkml.kernel.org/r/4FDF21A4020000780008A67F@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/dumpstack.c    | 1 +
 arch/x86/kernel/dumpstack_32.c | 1 -
 arch/x86/kernel/dumpstack_64.c | 1 -
 3 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 87d3b5d663cd..ae42418bc50f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -271,6 +271,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 			current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
 		return 1;
 
+	print_modules();
 	show_regs(regs);
 #ifdef CONFIG_X86_32
 	if (user_mode_vm(regs)) {
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 3a8aced11ae9..1038a417ea53 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -86,7 +86,6 @@ void show_regs(struct pt_regs *regs)
 {
 	int i;
 
-	print_modules();
 	__show_regs(regs, !user_mode_vm(regs));
 
 	pr_emerg("Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index c582e9c5bd1a..b653675d5288 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -254,7 +254,6 @@ void show_regs(struct pt_regs *regs)
 
 	sp = regs->sp;
 	printk("CPU %d ", cpu);
-	print_modules();
 	__show_regs(regs, 1);
 	printk(KERN_DEFAULT "Process %s (pid: %d, threadinfo %p, task %p)\n",
 	       cur->comm, cur->pid, task_thread_info(cur), cur);
-- 
cgit v1.2.3


From 2b1b712f050eaf0ac576591281446dc960c0afc5 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 20 Jun 2012 21:18:14 -0700
Subject: x86, reboot: Drop redundant write of reboot_mode

We write reboot_mode to BIOS location 0x472 in
native_machine_emergency_restart() (reboot.c:542) already, there is no
need to then write it again in machine_real_restart().

This means nothing gets written there for MRR_APM, but the APM call is
a poweroff call and doesn't use this memory location.

Link: http://lkml.kernel.org/n/tip-3i0pfh44c1e3jv5lab0cf7sc@git.kernel.org
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/reboot.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 6ddb9cd0ced0..6ef559f09acd 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -179,14 +179,6 @@ void __noreturn machine_real_restart(unsigned int type)
 	write_cr3(real_mode_header->trampoline_pgd);
 #endif
 
-	/*
-	 * Write 0x1234 to absolute memory location 0x472.  The BIOS reads
-	 * this on booting to tell it to "Bypass memory test (also warm
-	 * boot)".  This seems like a fairly standard thing that gets set by
-	 * REBOOT.COM programs, and the previous reset routine did this
-	 * too. */
-	*((unsigned short *)0x472) = reboot_mode;
-
 	/* Jump to the identity-mapped low memory code */
 #ifdef CONFIG_X86_32
 	asm volatile("jmpl *%0" : :
-- 
cgit v1.2.3


From 9751d7627582fc1cc64625d63bde9528c14f1544 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 21 Jun 2012 10:25:03 -0700
Subject: x86-64, reboot: Be more paranoid in 64-bit reboot=bios

Be a bit more paranoid in the transition back to 16-bit mode.  In
particular, in case the kernel is residing above the 4 GiB mark,
switch to the trampoline GDT, and make the jump after turning off
paging a far jump.  In theory, none of this should matter, but it is
exactly the kind of things that broken SMM or virtualization software
could trip up on.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Link: http://lkml.kernel.org/r/tip-jopx7y6g6dbcx4tpal8q0jlr@git.kernel.org
---
 arch/x86/realmode/rm/reboot.S | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/realmode/rm/reboot.S b/arch/x86/realmode/rm/reboot.S
index 6bf8feac5557..f932ea61d1c8 100644
--- a/arch/x86/realmode/rm/reboot.S
+++ b/arch/x86/realmode/rm/reboot.S
@@ -22,14 +22,18 @@
 ENTRY(machine_real_restart_asm)
 
 #ifdef CONFIG_X86_64
+	/* Switch to trampoline GDT as it is guaranteed < 4 GiB */
+	movl	$__KERNEL_DS, %eax
+	movl	%eax, %ds
+	lgdtl	pa_tr_gdt
 
 	/* Disable paging to drop us out of long mode */
 	movl	%cr0, %eax
 	andl	$~X86_CR0_PG, %eax
 	movl	%eax, %cr0
-	jmp	1f	/* "A branch" may be needed here, assume near is OK */
+	ljmpl	$__KERNEL32_CS, $pa_machine_real_restart_paging_off
 
-1:
+GLOBAL(machine_real_restart_paging_off)
 	xorl	%eax, %eax
 	xorl	%edx, %edx
 	movl	$MSR_EFER, %ecx
-- 
cgit v1.2.3


From f8bbfd7d28303967ca4e8597de9bdc9bf8b197e7 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 23 Feb 2012 17:07:06 +0100
Subject: oprofile, perf: Use per-cpu framework

This changes oprofile_perf.c to use the per-cpu framework.

Using the per-cpu framework should avoid error like the following:

 arch/arm/oprofile/../../../drivers/oprofile/oprofile_perf.c:28:28: error: variably modified 'perf_events' at file scope

Reported-by: William Cohen <wcohen@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 drivers/oprofile/oprofile_perf.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c
index efc4b7f308cf..f3cfa0b9adfa 100644
--- a/drivers/oprofile/oprofile_perf.c
+++ b/drivers/oprofile/oprofile_perf.c
@@ -1,5 +1,6 @@
 /*
  * Copyright 2010 ARM Ltd.
+ * Copyright 2012 Advanced Micro Devices, Inc., Robert Richter
  *
  * Perf-events backend for OProfile.
  */
@@ -25,7 +26,7 @@ static int oprofile_perf_enabled;
 static DEFINE_MUTEX(oprofile_perf_mutex);
 
 static struct op_counter_config *counter_config;
-static struct perf_event **perf_events[NR_CPUS];
+static DEFINE_PER_CPU(struct perf_event **, perf_events);
 static int num_counters;
 
 /*
@@ -38,7 +39,7 @@ static void op_overflow_handler(struct perf_event *event,
 	u32 cpu = smp_processor_id();
 
 	for (id = 0; id < num_counters; ++id)
-		if (perf_events[cpu][id] == event)
+		if (per_cpu(perf_events, cpu)[id] == event)
 			break;
 
 	if (id != num_counters)
@@ -74,7 +75,7 @@ static int op_create_counter(int cpu, int event)
 {
 	struct perf_event *pevent;
 
-	if (!counter_config[event].enabled || perf_events[cpu][event])
+	if (!counter_config[event].enabled || per_cpu(perf_events, cpu)[event])
 		return 0;
 
 	pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
@@ -91,18 +92,18 @@ static int op_create_counter(int cpu, int event)
 		return -EBUSY;
 	}
 
-	perf_events[cpu][event] = pevent;
+	per_cpu(perf_events, cpu)[event] = pevent;
 
 	return 0;
 }
 
 static void op_destroy_counter(int cpu, int event)
 {
-	struct perf_event *pevent = perf_events[cpu][event];
+	struct perf_event *pevent = per_cpu(perf_events, cpu)[event];
 
 	if (pevent) {
 		perf_event_release_kernel(pevent);
-		perf_events[cpu][event] = NULL;
+		per_cpu(perf_events, cpu)[event] = NULL;
 	}
 }
 
@@ -257,12 +258,12 @@ void oprofile_perf_exit(void)
 
 	for_each_possible_cpu(cpu) {
 		for (id = 0; id < num_counters; ++id) {
-			event = perf_events[cpu][id];
+			event = per_cpu(perf_events, cpu)[id];
 			if (event)
 				perf_event_release_kernel(event);
 		}
 
-		kfree(perf_events[cpu]);
+		kfree(per_cpu(perf_events, cpu));
 	}
 
 	kfree(counter_config);
@@ -277,8 +278,6 @@ int __init oprofile_perf_init(struct oprofile_operations *ops)
 	if (ret)
 		return ret;
 
-	memset(&perf_events, 0, sizeof(perf_events));
-
 	num_counters = perf_num_counters();
 	if (num_counters <= 0) {
 		pr_info("oprofile: no performance counters\n");
@@ -298,9 +297,9 @@ int __init oprofile_perf_init(struct oprofile_operations *ops)
 	}
 
 	for_each_possible_cpu(cpu) {
-		perf_events[cpu] = kcalloc(num_counters,
+		per_cpu(perf_events, cpu) = kcalloc(num_counters,
 				sizeof(struct perf_event *), GFP_KERNEL);
-		if (!perf_events[cpu]) {
+		if (!per_cpu(perf_events, cpu)) {
 			pr_info("oprofile: failed to allocate %d perf events "
 					"for cpu %d\n", num_counters, cpu);
 			ret = -ENOMEM;
-- 
cgit v1.2.3


From d9b0cde91c60da0ed5f92cdc3ac878142e6b5f27 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Tue, 29 May 2012 14:31:23 -0700
Subject: x86-64, gcc: Use -mpreferred-stack-boundary=3 if supported

On x86-64, the standard ABI requires alignment to 16 bytes.  However,
this is not actually necessary in the kernel (we don't do SSE except
in very controlled ways); and furthermore, the standard kernel entry
on x86-64 actually leaves the stack on an odd 8-byte boundary, which
means that gcc will generate extra instructions to keep the stack
*mis*aligned!

gcc 4.8 adds an -mpreferred-stack-boundary=3 option to override this
and lets us save some stack space and a handful of instructions.

Note that this causes us to pass -mno-sse twice; this is redundant,
but necessary since the cc-option test will fail unless -mno-sse is
passed on the same command line.

[ hpa: rewrote the patch description ]

Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
Link: http://lkml.kernel.org/r/CAMe9rOqPfy3JcZRLaUeCjBe9BVY-P6e0uaSbMi5hvS-6WwQueg@mail.gmail.com
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1f2521434554..b0c5276861ec 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -49,6 +49,9 @@ else
         KBUILD_AFLAGS += -m64
         KBUILD_CFLAGS += -m64
 
+	# Use -mpreferred-stack-boundary=3 if supported.
+	KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3)
+
         # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
         cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
         cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
-- 
cgit v1.2.3


From 357398e96d8c883b010379a7669df43ed0e2e32b Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 20 Jun 2012 18:39:27 +0200
Subject: perf/x86: Fix section mismatch in uncore_pci_init()

Fix section mismatch in uncore_pci_init():

 WARNING: vmlinux.o(.init.text+0x9246): Section mismatch in reference from the function uncore_pci_init() to the function .devexit.text:uncore_pci_remove()
 The function __init uncore_pci_init() references
 a function __devexit uncore_pci_remove().
 [...]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: <a.p.zijlstra@chello.nl>
Cc: <zheng.z.yan@intel.com>
Link: http://lkml.kernel.org/r/20120620163927.GI5046@erda.amd.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 28a8413ca199..6f43f9584e3d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -1313,7 +1313,7 @@ static int __devinit uncore_pci_add(struct intel_uncore_type *type,
 	return 0;
 }
 
-static void __devexit uncore_pci_remove(struct pci_dev *pdev)
+static void uncore_pci_remove(struct pci_dev *pdev)
 {
 	struct intel_uncore_box *box = pci_get_drvdata(pdev);
 	struct intel_uncore_pmu *pmu = box->pmu;
-- 
cgit v1.2.3


From 11cab711f686893f2696a061dfca30454a624784 Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Fri, 22 Jun 2012 08:12:12 -0500
Subject: x86/uv: Fix the UV BAU destination timeout period

Correct the calculation of a destination timeout period, which
is used to distinguish between a destination timeout and the
situation where all the target software ack resources are full
and a request is returned immediately.

The problem is that integer arithmetic was overflowing, yielding
a very large result.

Without this fix destination timeouts are identified as resource
'plugged' events and an ipi method of resource releasing is
unnecessarily employed.

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Link: http://lkml.kernel.org/r/20120622131212.GA31884@sgi.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/uv/tlb_uv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 59880afa851f..0c48d438cbba 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1811,8 +1811,8 @@ static int calculate_destination_timeout(void)
 		index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK;
 		mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT);
 		mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK;
-		base = timeout_base_ns[index];
-		ts_ns = base * mult1 * mult2;
+		ts_ns = timeout_base_ns[index];
+		ts_ns *= (mult1 * mult2);
 		ret = ts_ns / 1000;
 	} else {
 		/* 4 bits  0/1 for 10/80us base, 3 bits of multiplier */
-- 
cgit v1.2.3


From 26ef85770c765bb8b6b6922f8a413872dd8e3979 Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Fri, 22 Jun 2012 08:13:30 -0500
Subject: x86/uv: Implement UV BAU runtime enable and disable control via
 /proc/sgi_uv/

This patch enables the BAU to be turned on or off dynamically.

  echo "on"  > /proc/sgi_uv/ptc_statistics
  echo "off" > /proc/sgi_uv/ptc_statistics

The system may be booted with or without the nobau option.

Whether the system currently has the BAU off can be seen in
the /proc file -- normally with the baustats script.
Each cpu will have a 1 in the bauoff field if the BAU was turned
off, so baustats will give a count of cpus that have it off.

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Link: http://lkml.kernel.org/r/20120622131330.GB31884@sgi.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/uv/uv_bau.h |  2 ++
 arch/x86/platform/uv/tlb_uv.c    | 76 ++++++++++++++++++++++++++++++++--------
 2 files changed, 63 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 6149b476d9df..847c00b721b2 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -520,6 +520,7 @@ struct ptc_stats {
 	unsigned long	s_uv2_wars;		/* uv2 workaround, perm. busy */
 	unsigned long	s_uv2_wars_hw;		/* uv2 workaround, hiwater */
 	unsigned long	s_uv2_war_waits;	/* uv2 workaround, long waits */
+	unsigned long   s_enters;		/* entries to the driver */
 	/* destination statistics */
 	unsigned long	d_alltlb;		/* times all tlb's on this
 						   cpu were flushed */
@@ -586,6 +587,7 @@ struct bau_control {
 	int			timeout_tries;
 	int			ipi_attempts;
 	int			conseccompletes;
+	short			nobau;
 	int			baudisabled;
 	int			set_bau_off;
 	short			cpu;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 0c48d438cbba..1492170cbb5a 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -38,6 +38,7 @@ static int timeout_base_ns[] = {
 
 static int timeout_us;
 static int nobau;
+static int nobau_perm;
 static int baudisabled;
 static spinlock_t disable_lock;
 static cycles_t congested_cycles;
@@ -120,6 +121,40 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
 static DEFINE_PER_CPU(struct bau_control, bau_control);
 static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
 
+static void
+set_bau_on(void)
+{
+	int cpu;
+	struct bau_control *bcp;
+
+	if (nobau_perm) {
+		pr_info("BAU not initialized; cannot be turned on\n");
+		return;
+	}
+	nobau = 0;
+	for_each_present_cpu(cpu) {
+		bcp = &per_cpu(bau_control, cpu);
+		bcp->nobau = 0;
+	}
+	pr_info("BAU turned on\n");
+	return;
+}
+
+static void
+set_bau_off(void)
+{
+	int cpu;
+	struct bau_control *bcp;
+
+	nobau = 1;
+	for_each_present_cpu(cpu) {
+		bcp = &per_cpu(bau_control, cpu);
+		bcp->nobau = 1;
+	}
+	pr_info("BAU turned off\n");
+	return;
+}
+
 /*
  * Determine the first node on a uvhub. 'Nodes' are used for kernel
  * memory allocation.
@@ -1079,12 +1114,12 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 	struct ptc_stats *stat;
 	struct bau_control *bcp;
 
-	/* kernel was booted 'nobau' */
-	if (nobau)
-		return cpumask;
-
 	bcp = &per_cpu(bau_control, cpu);
 	stat = bcp->statp;
+	stat->s_enters++;
+
+	if (bcp->nobau)
+		return cpumask;
 
 	/* bau was disabled due to slow response */
 	if (bcp->baudisabled) {
@@ -1338,29 +1373,32 @@ static inline unsigned long long usec_2_cycles(unsigned long microsec)
 static int ptc_seq_show(struct seq_file *file, void *data)
 {
 	struct ptc_stats *stat;
+	struct bau_control *bcp;
 	int cpu;
 
 	cpu = *(loff_t *)data;
 	if (!cpu) {
 		seq_printf(file,
-			"# cpu sent stime self locals remotes ncpus localhub ");
+		"# cpu bauoff sent stime self locals remotes ncpus localhub ");
 		seq_printf(file,
 			"remotehub numuvhubs numuvhubs16 numuvhubs8 ");
 		seq_printf(file,
 		    "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok ");
 		seq_printf(file,
-			"resetp resett giveup sto bz throt swack recv rtime ");
+			"resetp resett giveup sto bz throt enters swack recv rtime ");
 		seq_printf(file,
 			"all one mult none retry canc nocan reset rcan ");
 		seq_printf(file,
 			"disable enable wars warshw warwaits\n");
 	}
 	if (cpu < num_possible_cpus() && cpu_online(cpu)) {
-		stat = &per_cpu(ptcstats, cpu);
+		bcp = &per_cpu(bau_control, cpu);
+		stat = bcp->statp;
 		/* source side statistics */
 		seq_printf(file,
-			"cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
-			   cpu, stat->s_requestor, cycles_2_us(stat->s_time),
+			   "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
+			   cpu, bcp->nobau, stat->s_requestor,
+			   cycles_2_us(stat->s_time),
 			   stat->s_ntargself, stat->s_ntarglocals,
 			   stat->s_ntargremotes, stat->s_ntargcpu,
 			   stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
@@ -1369,11 +1407,11 @@ static int ptc_seq_show(struct seq_file *file, void *data)
 			   stat->s_ntarguvhub8, stat->s_ntarguvhub4,
 			   stat->s_ntarguvhub2, stat->s_ntarguvhub1,
 			   stat->s_dtimeout, stat->s_strongnacks);
-		seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",
+		seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld ",
 			   stat->s_retry_messages, stat->s_retriesok,
 			   stat->s_resets_plug, stat->s_resets_timeout,
 			   stat->s_giveup, stat->s_stimeout,
-			   stat->s_busy, stat->s_throttles);
+			   stat->s_busy, stat->s_throttles, stat->s_enters);
 
 		/* destination side statistics */
 		seq_printf(file,
@@ -1438,6 +1476,14 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user,
 		return -EFAULT;
 	optstr[count - 1] = '\0';
 
+	if (!strcmp(optstr, "on")) {
+		set_bau_on();
+		return count;
+	} else if (!strcmp(optstr, "off")) {
+		set_bau_off();
+		return count;
+	}
+
 	if (strict_strtol(optstr, 10, &input_arg) < 0) {
 		printk(KERN_DEBUG "%s is invalid\n", optstr);
 		return -EINVAL;
@@ -1836,6 +1882,8 @@ static void __init init_per_cpu_tunables(void)
 	for_each_present_cpu(cpu) {
 		bcp = &per_cpu(bau_control, cpu);
 		bcp->baudisabled		= 0;
+		if (nobau)
+			bcp->nobau		= 1;
 		bcp->statp			= &per_cpu(ptcstats, cpu);
 		/* time interval to catch a hardware stay-busy bug */
 		bcp->timeout_interval		= usec_2_cycles(2*timeout_us);
@@ -2069,9 +2117,6 @@ static int __init uv_bau_init(void)
 	if (!is_uv_system())
 		return 0;
 
-	if (nobau)
-		return 0;
-
 	for_each_possible_cpu(cur_cpu) {
 		mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
 		zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
@@ -2091,7 +2136,8 @@ static int __init uv_bau_init(void)
 	enable_timeouts();
 
 	if (init_per_cpu(nuvhubs, uv_base_pnode)) {
-		nobau = 1;
+		set_bau_off();
+		nobau_perm = 1;
 		return 0;
 	}
 
-- 
cgit v1.2.3


From 8b6e511e51f7e540c8e71022318ee4cc9a4567a7 Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Fri, 22 Jun 2012 08:14:59 -0500
Subject: x86/uv: Work around UV2 BAU hangs

On SGI's UV2 the BAU (Broadcast Assist Unit) driver can hang
under a heavy load. To cure this:

- Disable the UV2 extended status mode (see UV2_EXT_SHFT), as
  this mode changes BAU behavior in more ways then just delivering
  an extra bit of status.  Revert status to just two meaningful bits,
  like UV1.

- Use no IPI-style resets on UV2.  Just give up the request for
  whatever the reason it failed and let it be accomplished with
  the legacy IPI method.

- Use no alternate sending descriptor (the former UV2 workaround
  bcp->using_desc and handle_uv2_busy() stuff).  Just disable the
  use of the BAU for a period of time in favor of the legacy IPI
  method when the h/w bug leaves a descriptor busy.

  -- new tunable: giveup_limit determines the threshold at which a hub is
     so plugged that it should do all requests with the legacy IPI method for a
     period of time
  -- generalize disable_for_congestion() (renamed disable_for_period()) for
     use whenever a hub should avoid using the BAU for a period of time

Also:

 - Fix find_another_by_swack(), which is part of the UV2 bug workaround

 - Correct and clarify the statistics (new stats s_overipilimit, s_giveuplimit,
   s_enters, s_ipifordisabled, s_plugged, s_congested)

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Link: http://lkml.kernel.org/r/20120622131459.GC31884@sgi.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/uv/uv_bau.h |  28 ++-
 arch/x86/platform/uv/tlb_uv.c    | 387 ++++++++++++++++++---------------------
 2 files changed, 200 insertions(+), 215 deletions(-)

diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 847c00b721b2..a06983cdc125 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -140,6 +140,9 @@
 #define IPI_RESET_LIMIT			1
 /* after this # consecutive successes, bump up the throttle if it was lowered */
 #define COMPLETE_THRESHOLD		5
+/* after this # of giveups (fall back to kernel IPI's) disable the use of
+   the BAU for a period of time */
+#define GIVEUP_LIMIT			100
 
 #define UV_LB_SUBNODEID			0x10
 
@@ -166,7 +169,6 @@
 #define FLUSH_RETRY_TIMEOUT		2
 #define FLUSH_GIVEUP			3
 #define FLUSH_COMPLETE			4
-#define FLUSH_RETRY_BUSYBUG		5
 
 /*
  * tuning the action when the numalink network is extremely delayed
@@ -175,7 +177,7 @@
 						   microseconds */
 #define CONGESTED_REPS			10	/* long delays averaged over
 						   this many broadcasts */
-#define CONGESTED_PERIOD		30	/* time for the bau to be
+#define DISABLED_PERIOD			10	/* time for the bau to be
 						   disabled, in seconds */
 /* see msg_type: */
 #define MSG_NOOP			0
@@ -520,7 +522,12 @@ struct ptc_stats {
 	unsigned long	s_uv2_wars;		/* uv2 workaround, perm. busy */
 	unsigned long	s_uv2_wars_hw;		/* uv2 workaround, hiwater */
 	unsigned long	s_uv2_war_waits;	/* uv2 workaround, long waits */
-	unsigned long   s_enters;		/* entries to the driver */
+	unsigned long	s_overipilimit;		/* over the ipi reset limit */
+	unsigned long	s_giveuplimit;		/* disables, over giveup limit*/
+	unsigned long	s_enters;		/* entries to the driver */
+	unsigned long	s_ipifordisabled;	/* fall back to IPI; disabled */
+	unsigned long	s_plugged;		/* plugged by h/w bug*/
+	unsigned long	s_congested;		/* giveup on long wait */
 	/* destination statistics */
 	unsigned long	d_alltlb;		/* times all tlb's on this
 						   cpu were flushed */
@@ -588,8 +595,7 @@ struct bau_control {
 	int			ipi_attempts;
 	int			conseccompletes;
 	short			nobau;
-	int			baudisabled;
-	int			set_bau_off;
+	short			baudisabled;
 	short			cpu;
 	short			osnode;
 	short			uvhub_cpu;
@@ -598,14 +604,16 @@ struct bau_control {
 	short			cpus_in_socket;
 	short			cpus_in_uvhub;
 	short			partition_base_pnode;
-	short			using_desc; /* an index, like uvhub_cpu */
-	unsigned int		inuse_map;
+	short			busy;       /* all were busy (war) */
 	unsigned short		message_number;
 	unsigned short		uvhub_quiesce;
 	short			socket_acknowledge_count[DEST_Q_SIZE];
 	cycles_t		send_message;
+	cycles_t		period_end;
+	cycles_t		period_time;
 	spinlock_t		uvhub_lock;
 	spinlock_t		queue_lock;
+	spinlock_t		disable_lock;
 	/* tunables */
 	int			max_concurr;
 	int			max_concurr_const;
@@ -616,9 +624,9 @@ struct bau_control {
 	int			complete_threshold;
 	int			cong_response_us;
 	int			cong_reps;
-	int			cong_period;
-	unsigned long		clocks_per_100_usec;
-	cycles_t		period_time;
+	cycles_t		disabled_period;
+	int			period_giveups;
+	int			giveup_limit;
 	long			period_requests;
 	struct hub_and_pnode	*thp;
 };
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 1492170cbb5a..71b5d5a07d7b 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1,7 +1,7 @@
 /*
  *	SGI UltraViolet TLB flush routines.
  *
- *	(c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI.
+ *	(c) 2008-2012 Cliff Wickman <cpw@sgi.com>, SGI.
  *
  *	This code is released under the GNU General Public License version 2 or
  *	later.
@@ -39,8 +39,6 @@ static int timeout_base_ns[] = {
 static int timeout_us;
 static int nobau;
 static int nobau_perm;
-static int baudisabled;
-static spinlock_t disable_lock;
 static cycles_t congested_cycles;
 
 /* tunables: */
@@ -48,12 +46,13 @@ static int max_concurr		= MAX_BAU_CONCURRENT;
 static int max_concurr_const	= MAX_BAU_CONCURRENT;
 static int plugged_delay	= PLUGGED_DELAY;
 static int plugsb4reset		= PLUGSB4RESET;
+static int giveup_limit		= GIVEUP_LIMIT;
 static int timeoutsb4reset	= TIMEOUTSB4RESET;
 static int ipi_reset_limit	= IPI_RESET_LIMIT;
 static int complete_threshold	= COMPLETE_THRESHOLD;
 static int congested_respns_us	= CONGESTED_RESPONSE_US;
 static int congested_reps	= CONGESTED_REPS;
-static int congested_period	= CONGESTED_PERIOD;
+static int disabled_period	= DISABLED_PERIOD;
 
 static struct tunables tunables[] = {
 	{&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */
@@ -64,7 +63,8 @@ static struct tunables tunables[] = {
 	{&complete_threshold, COMPLETE_THRESHOLD},
 	{&congested_respns_us, CONGESTED_RESPONSE_US},
 	{&congested_reps, CONGESTED_REPS},
-	{&congested_period, CONGESTED_PERIOD}
+	{&disabled_period, DISABLED_PERIOD},
+	{&giveup_limit, GIVEUP_LIMIT}
 };
 
 static struct dentry *tunables_dir;
@@ -313,7 +313,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
 		 * Both sockets dump their completed count total into
 		 * the message's count.
 		 */
-		smaster->socket_acknowledge_count[mdp->msg_slot] = 0;
+		*sp = 0;
 		asp = (struct atomic_short *)&msg->acknowledge_count;
 		msg_ack_count = atom_asr(socket_ack_count, asp);
 
@@ -526,16 +526,15 @@ static int uv1_wait_completion(struct bau_desc *bau_desc,
 }
 
 /*
- * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register.
+ * UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register.
+ * But not currently used.
  */
 static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc)
 {
 	unsigned long descriptor_status;
-	unsigned long descriptor_status2;
 
-	descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK);
-	descriptor_status2 = (read_mmr_uv2_status() >> desc) & 0x1UL;
-	descriptor_status = (descriptor_status << 1) | descriptor_status2;
+	descriptor_status =
+		((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1;
 	return descriptor_status;
 }
 
@@ -566,87 +565,11 @@ int normal_busy(struct bau_control *bcp)
  */
 int handle_uv2_busy(struct bau_control *bcp)
 {
-	int busy_one = bcp->using_desc;
-	int normal = bcp->uvhub_cpu;
-	int selected = -1;
-	int i;
-	unsigned long descriptor_status;
-	unsigned long status;
-	int mmr_offset;
-	struct bau_desc *bau_desc_old;
-	struct bau_desc *bau_desc_new;
-	struct bau_control *hmaster = bcp->uvhub_master;
 	struct ptc_stats *stat = bcp->statp;
-	cycles_t ttm;
 
 	stat->s_uv2_wars++;
-	spin_lock(&hmaster->uvhub_lock);
-	/* try for the original first */
-	if (busy_one != normal) {
-		if (!normal_busy(bcp))
-			selected = normal;
-	}
-	if (selected < 0) {
-		/* can't use the normal, select an alternate */
-		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
-		descriptor_status = read_lmmr(mmr_offset);
-
-		/* scan available descriptors 32-63 */
-		for (i = 0; i < UV_CPUS_PER_AS; i++) {
-			if ((hmaster->inuse_map & (1 << i)) == 0) {
-				status = ((descriptor_status >>
-						(i * UV_ACT_STATUS_SIZE)) &
-						UV_ACT_STATUS_MASK) << 1;
-				if (status != UV2H_DESC_BUSY) {
-					selected = i + UV_CPUS_PER_AS;
-					break;
-				}
-			}
-		}
-	}
-
-	if (busy_one != normal)
-		/* mark the busy alternate as not in-use */
-		hmaster->inuse_map &= ~(1 << (busy_one - UV_CPUS_PER_AS));
-
-	if (selected >= 0) {
-		/* switch to the selected descriptor */
-		if (selected != normal) {
-			/* set the selected alternate as in-use */
-			hmaster->inuse_map |=
-					(1 << (selected - UV_CPUS_PER_AS));
-			if (selected > stat->s_uv2_wars_hw)
-				stat->s_uv2_wars_hw = selected;
-		}
-		bau_desc_old = bcp->descriptor_base;
-		bau_desc_old += (ITEMS_PER_DESC * busy_one);
-		bcp->using_desc = selected;
-		bau_desc_new = bcp->descriptor_base;
-		bau_desc_new += (ITEMS_PER_DESC * selected);
-		*bau_desc_new = *bau_desc_old;
-	} else {
-		/*
-		 * All are busy. Wait for the normal one for this cpu to
-		 * free up.
-		 */
-		stat->s_uv2_war_waits++;
-		spin_unlock(&hmaster->uvhub_lock);
-		ttm = get_cycles();
-		do {
-			cpu_relax();
-		} while (normal_busy(bcp));
-		spin_lock(&hmaster->uvhub_lock);
-		/* switch to the original descriptor */
-		bcp->using_desc = normal;
-		bau_desc_old = bcp->descriptor_base;
-		bau_desc_old += (ITEMS_PER_DESC * bcp->using_desc);
-		bcp->using_desc = (ITEMS_PER_DESC * normal);
-		bau_desc_new = bcp->descriptor_base;
-		bau_desc_new += (ITEMS_PER_DESC * normal);
-		*bau_desc_new = *bau_desc_old; /* copy the entire descriptor */
-	}
-	spin_unlock(&hmaster->uvhub_lock);
-	return FLUSH_RETRY_BUSYBUG;
+	bcp->busy = 1;
+	return FLUSH_GIVEUP;
 }
 
 static int uv2_wait_completion(struct bau_desc *bau_desc,
@@ -655,7 +578,7 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
 {
 	unsigned long descriptor_stat;
 	cycles_t ttm;
-	int desc = bcp->using_desc;
+	int desc = bcp->uvhub_cpu;
 	long busy_reps = 0;
 	struct ptc_stats *stat = bcp->statp;
 
@@ -663,24 +586,38 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
 
 	/* spin on the status MMR, waiting for it to go idle */
 	while (descriptor_stat != UV2H_DESC_IDLE) {
-		/*
-		 * Our software ack messages may be blocked because
-		 * there are no swack resources available.  As long
-		 * as none of them has timed out hardware will NACK
-		 * our message and its state will stay IDLE.
-		 */
-		if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) ||
-		    (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) {
+		if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) {
+			/*
+			 * A h/w bug on the destination side may
+			 * have prevented the message being marked
+			 * pending, thus it doesn't get replied to
+			 * and gets continually nacked until it times
+			 * out with a SOURCE_TIMEOUT.
+			 */
 			stat->s_stimeout++;
 			return FLUSH_GIVEUP;
-		} else if (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) {
-			stat->s_strongnacks++;
-			bcp->conseccompletes = 0;
-			return FLUSH_GIVEUP;
 		} else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
+			ttm = get_cycles();
+
+			/*
+			 * Our retries may be blocked by all destination
+			 * swack resources being consumed, and a timeout
+			 * pending.  In that case hardware returns the
+			 * ERROR that looks like a destination timeout.
+			 * Without using the extended status we have to
+			 * deduce from the short time that this was a
+			 * strong nack.
+			 */
+			if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
+				bcp->conseccompletes = 0;
+				stat->s_plugged++;
+				/* FLUSH_RETRY_PLUGGED causes hang on boot */
+				return FLUSH_GIVEUP;
+			}
 			stat->s_dtimeout++;
 			bcp->conseccompletes = 0;
-			return FLUSH_RETRY_TIMEOUT;
+			/* FLUSH_RETRY_TIMEOUT causes hang on boot */
+			return FLUSH_GIVEUP;
 		} else {
 			busy_reps++;
 			if (busy_reps > 1000000) {
@@ -688,9 +625,8 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
 				busy_reps = 0;
 				ttm = get_cycles();
 				if ((ttm - bcp->send_message) >
-					(bcp->clocks_per_100_usec)) {
+						bcp->timeout_interval)
 					return handle_uv2_busy(bcp);
-				}
 			}
 			/*
 			 * descriptor_stat is still BUSY
@@ -714,7 +650,7 @@ static int wait_completion(struct bau_desc *bau_desc,
 {
 	int right_shift;
 	unsigned long mmr_offset;
-	int desc = bcp->using_desc;
+	int desc = bcp->uvhub_cpu;
 
 	if (desc < UV_CPUS_PER_AS) {
 		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
@@ -793,33 +729,31 @@ static void destination_timeout(struct bau_desc *bau_desc,
 }
 
 /*
- * Completions are taking a very long time due to a congested numalink
- * network.
+ * Stop all cpus on a uvhub from using the BAU for a period of time.
+ * This is reversed by check_enable.
  */
-static void disable_for_congestion(struct bau_control *bcp,
-					struct ptc_stats *stat)
+static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
 {
-	/* let only one cpu do this disabling */
-	spin_lock(&disable_lock);
-
-	if (!baudisabled && bcp->period_requests &&
-	    ((bcp->period_time / bcp->period_requests) > congested_cycles)) {
-		int tcpu;
-		struct bau_control *tbcp;
-		/* it becomes this cpu's job to turn on the use of the
-		   BAU again */
-		baudisabled = 1;
-		bcp->set_bau_off = 1;
-		bcp->set_bau_on_time = get_cycles();
-		bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period);
+	int tcpu;
+	struct bau_control *tbcp;
+	struct bau_control *hmaster;
+	cycles_t tm1;
+
+	hmaster = bcp->uvhub_master;
+	spin_lock(&hmaster->disable_lock);
+	if (!bcp->baudisabled) {
 		stat->s_bau_disabled++;
+		tm1 = get_cycles();
 		for_each_present_cpu(tcpu) {
 			tbcp = &per_cpu(bau_control, tcpu);
-			tbcp->baudisabled = 1;
+			if (tbcp->uvhub_master == hmaster) {
+				tbcp->baudisabled = 1;
+				tbcp->set_bau_on_time =
+					tm1 + bcp->disabled_period;
+			}
 		}
 	}
-
-	spin_unlock(&disable_lock);
+	spin_unlock(&hmaster->disable_lock);
 }
 
 static void count_max_concurr(int stat, struct bau_control *bcp,
@@ -850,16 +784,30 @@ static void record_send_stats(cycles_t time1, cycles_t time2,
 			bcp->period_requests++;
 			bcp->period_time += elapsed;
 			if ((elapsed > congested_cycles) &&
-			    (bcp->period_requests > bcp->cong_reps))
-				disable_for_congestion(bcp, stat);
+			    (bcp->period_requests > bcp->cong_reps) &&
+			    ((bcp->period_time / bcp->period_requests) >
+							congested_cycles)) {
+				stat->s_congested++;
+				disable_for_period(bcp, stat);
+			}
 		}
 	} else
 		stat->s_requestor--;
 
 	if (completion_status == FLUSH_COMPLETE && try > 1)
 		stat->s_retriesok++;
-	else if (completion_status == FLUSH_GIVEUP)
+	else if (completion_status == FLUSH_GIVEUP) {
 		stat->s_giveup++;
+		if (get_cycles() > bcp->period_end)
+			bcp->period_giveups = 0;
+		bcp->period_giveups++;
+		if (bcp->period_giveups == 1)
+			bcp->period_end = get_cycles() + bcp->disabled_period;
+		if (bcp->period_giveups > bcp->giveup_limit) {
+			disable_for_period(bcp, stat);
+			stat->s_giveuplimit++;
+		}
+	}
 }
 
 /*
@@ -903,7 +851,8 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
  * Returns 1 if it gives up entirely and the original cpu mask is to be
  * returned to the kernel.
  */
-int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
+int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp,
+	struct bau_desc *bau_desc)
 {
 	int seq_number = 0;
 	int completion_stat = 0;
@@ -916,24 +865,23 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
 	struct bau_control *hmaster = bcp->uvhub_master;
 	struct uv1_bau_msg_header *uv1_hdr = NULL;
 	struct uv2_bau_msg_header *uv2_hdr = NULL;
-	struct bau_desc *bau_desc;
 
-	if (bcp->uvhub_version == 1)
+	if (bcp->uvhub_version == 1) {
+		uv1 = 1;
 		uv1_throttle(hmaster, stat);
+	}
 
 	while (hmaster->uvhub_quiesce)
 		cpu_relax();
 
 	time1 = get_cycles();
+	if (uv1)
+		uv1_hdr = &bau_desc->header.uv1_hdr;
+	else
+		uv2_hdr = &bau_desc->header.uv2_hdr;
+
 	do {
-		bau_desc = bcp->descriptor_base;
-		bau_desc += (ITEMS_PER_DESC * bcp->using_desc);
-		if (bcp->uvhub_version == 1) {
-			uv1 = 1;
-			uv1_hdr = &bau_desc->header.uv1_hdr;
-		} else
-			uv2_hdr = &bau_desc->header.uv2_hdr;
-		if ((try == 0) || (completion_stat == FLUSH_RETRY_BUSYBUG)) {
+		if (try == 0) {
 			if (uv1)
 				uv1_hdr->msg_type = MSG_REGULAR;
 			else
@@ -951,25 +899,24 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
 			uv1_hdr->sequence = seq_number;
 		else
 			uv2_hdr->sequence = seq_number;
-		index = (1UL << AS_PUSH_SHIFT) | bcp->using_desc;
+		index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
 		bcp->send_message = get_cycles();
 
 		write_mmr_activation(index);
 
 		try++;
 		completion_stat = wait_completion(bau_desc, bcp, try);
-		/* UV2: wait_completion() may change the bcp->using_desc */
 
 		handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
 
 		if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
 			bcp->ipi_attempts = 0;
+			stat->s_overipilimit++;
 			completion_stat = FLUSH_GIVEUP;
 			break;
 		}
 		cpu_relax();
 	} while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
-		 (completion_stat == FLUSH_RETRY_BUSYBUG) ||
 		 (completion_stat == FLUSH_RETRY_TIMEOUT));
 
 	time2 = get_cycles();
@@ -990,28 +937,33 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
 }
 
 /*
- * The BAU is disabled. When the disabled time period has expired, the cpu
- * that disabled it must re-enable it.
- * Return 0 if it is re-enabled for all cpus.
+ * The BAU is disabled for this uvhub. When the disabled time period has
+ * expired re-enable it.
+ * Return 0 if it is re-enabled for all cpus on this uvhub.
  */
 static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
 {
 	int tcpu;
 	struct bau_control *tbcp;
+	struct bau_control *hmaster;
 
-	if (bcp->set_bau_off) {
-		if (get_cycles() >= bcp->set_bau_on_time) {
-			stat->s_bau_reenabled++;
-			baudisabled = 0;
-			for_each_present_cpu(tcpu) {
-				tbcp = &per_cpu(bau_control, tcpu);
+	hmaster = bcp->uvhub_master;
+	spin_lock(&hmaster->disable_lock);
+	if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
+		stat->s_bau_reenabled++;
+		for_each_present_cpu(tcpu) {
+			tbcp = &per_cpu(bau_control, tcpu);
+			if (tbcp->uvhub_master == hmaster) {
 				tbcp->baudisabled = 0;
 				tbcp->period_requests = 0;
 				tbcp->period_time = 0;
+				tbcp->period_giveups = 0;
 			}
-			return 0;
 		}
+		spin_unlock(&hmaster->disable_lock);
+		return 0;
 	}
+	spin_unlock(&hmaster->disable_lock);
 	return -1;
 }
 
@@ -1113,6 +1065,8 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 	struct cpumask *flush_mask;
 	struct ptc_stats *stat;
 	struct bau_control *bcp;
+	unsigned long descriptor_status;
+	unsigned long status;
 
 	bcp = &per_cpu(bau_control, cpu);
 	stat = bcp->statp;
@@ -1121,10 +1075,22 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 	if (bcp->nobau)
 		return cpumask;
 
+	if (bcp->busy) {
+		descriptor_status =
+			read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0);
+		status = ((descriptor_status >> (bcp->uvhub_cpu *
+			UV_ACT_STATUS_SIZE)) & UV_ACT_STATUS_MASK) << 1;
+		if (status == UV2H_DESC_BUSY)
+			return cpumask;
+		bcp->busy = 0;
+	}
+
 	/* bau was disabled due to slow response */
 	if (bcp->baudisabled) {
-		if (check_enable(bcp, stat))
+		if (check_enable(bcp, stat)) {
+			stat->s_ipifordisabled++;
 			return cpumask;
+		}
 	}
 
 	/*
@@ -1140,7 +1106,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 		stat->s_ntargself++;
 
 	bau_desc = bcp->descriptor_base;
-	bau_desc += (ITEMS_PER_DESC * bcp->using_desc);
+	bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu);
 	bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
 	if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
 		return NULL;
@@ -1153,25 +1119,27 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 	 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
 	 * or 1 if it gave up and the original cpumask should be returned.
 	 */
-	if (!uv_flush_send_and_wait(flush_mask, bcp))
+	if (!uv_flush_send_and_wait(flush_mask, bcp, bau_desc))
 		return NULL;
 	else
 		return cpumask;
 }
 
 /*
- * Search the message queue for any 'other' message with the same software
- * acknowledge resource bit vector.
+ * Search the message queue for any 'other' unprocessed message with the
+ * same software acknowledge resource bit vector as the 'msg' message.
  */
 struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
-			struct bau_control *bcp, unsigned char swack_vec)
+					   struct bau_control *bcp)
 {
 	struct bau_pq_entry *msg_next = msg + 1;
+	unsigned char swack_vec = msg->swack_vec;
 
 	if (msg_next > bcp->queue_last)
 		msg_next = bcp->queue_first;
-	while ((msg_next->swack_vec != 0) && (msg_next != msg)) {
-		if (msg_next->swack_vec == swack_vec)
+	while (msg_next != msg) {
+		if ((msg_next->canceled == 0) && (msg_next->replied_to == 0) &&
+				(msg_next->swack_vec == swack_vec))
 			return msg_next;
 		msg_next++;
 		if (msg_next > bcp->queue_last)
@@ -1200,32 +1168,30 @@ void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp)
 		 * This message was assigned a swack resource, but no
 		 * reserved acknowlegment is pending.
 		 * The bug has prevented this message from setting the MMR.
-		 * And no other message has used the same sw_ack resource.
-		 * Do the requested shootdown but do not reply to the msg.
-		 * (the 0 means make no acknowledge)
 		 */
-		bau_process_message(mdp, bcp, 0);
-		return;
-	}
-
-	/*
-	 * Some message has set the MMR 'pending' bit; it might have been
-	 * another message.  Look for that message.
-	 */
-	other_msg = find_another_by_swack(msg, bcp, msg->swack_vec);
-	if (other_msg) {
-		/* There is another.  Do not ack the current one. */
-		bau_process_message(mdp, bcp, 0);
 		/*
-		 * Let the natural processing of that message acknowledge
-		 * it. Don't get the processing of sw_ack's out of order.
+		 * Some message has set the MMR 'pending' bit; it might have
+		 * been another message.  Look for that message.
 		 */
-		return;
+		other_msg = find_another_by_swack(msg, bcp);
+		if (other_msg) {
+			/*
+			 * There is another. Process this one but do not
+			 * ack it.
+			 */
+			bau_process_message(mdp, bcp, 0);
+			/*
+			 * Let the natural processing of that other message
+			 * acknowledge it. Don't get the processing of sw_ack's
+			 * out of order.
+			 */
+			return;
+		}
 	}
 
 	/*
-	 * There is no other message using this sw_ack, so it is safe to
-	 * acknowledge it.
+	 * Either the MMR shows this one pending a reply or there is no
+	 * other message using this sw_ack, so it is safe to acknowledge it.
 	 */
 	bau_process_message(mdp, bcp, 1);
 
@@ -1330,7 +1296,8 @@ static void __init enable_timeouts(void)
 		 */
 		mmr_image |= (1L << SOFTACK_MSHIFT);
 		if (is_uv2_hub()) {
-			mmr_image |= (1L << UV2_EXT_SHFT);
+			/* hw bug workaround; do not use extended status */
+			mmr_image &= ~(1L << UV2_EXT_SHFT);
 		}
 		write_mmr_misc_control(pnode, mmr_image);
 	}
@@ -1379,24 +1346,26 @@ static int ptc_seq_show(struct seq_file *file, void *data)
 	cpu = *(loff_t *)data;
 	if (!cpu) {
 		seq_printf(file,
-		"# cpu bauoff sent stime self locals remotes ncpus localhub ");
+		 "# cpu bauoff sent stime self locals remotes ncpus localhub ");
 		seq_printf(file,
 			"remotehub numuvhubs numuvhubs16 numuvhubs8 ");
 		seq_printf(file,
-		    "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok ");
+			"numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries ");
+		seq_printf(file,
+			"rok resetp resett giveup sto bz throt disable ");
 		seq_printf(file,
-			"resetp resett giveup sto bz throt enters swack recv rtime ");
+			"enable wars warshw warwaits enters ipidis plugged ");
 		seq_printf(file,
-			"all one mult none retry canc nocan reset rcan ");
+			"ipiover glim cong swack recv rtime all one mult ");
 		seq_printf(file,
-			"disable enable wars warshw warwaits\n");
+			"none retry canc nocan reset rcan\n");
 	}
 	if (cpu < num_possible_cpus() && cpu_online(cpu)) {
 		bcp = &per_cpu(bau_control, cpu);
 		stat = bcp->statp;
 		/* source side statistics */
 		seq_printf(file,
-			   "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
+			"cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
 			   cpu, bcp->nobau, stat->s_requestor,
 			   cycles_2_us(stat->s_time),
 			   stat->s_ntargself, stat->s_ntarglocals,
@@ -1407,25 +1376,28 @@ static int ptc_seq_show(struct seq_file *file, void *data)
 			   stat->s_ntarguvhub8, stat->s_ntarguvhub4,
 			   stat->s_ntarguvhub2, stat->s_ntarguvhub1,
 			   stat->s_dtimeout, stat->s_strongnacks);
-		seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld ",
+		seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",
 			   stat->s_retry_messages, stat->s_retriesok,
 			   stat->s_resets_plug, stat->s_resets_timeout,
 			   stat->s_giveup, stat->s_stimeout,
-			   stat->s_busy, stat->s_throttles, stat->s_enters);
+			   stat->s_busy, stat->s_throttles);
+		seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
+			   stat->s_bau_disabled, stat->s_bau_reenabled,
+			   stat->s_uv2_wars, stat->s_uv2_wars_hw,
+			   stat->s_uv2_war_waits, stat->s_enters,
+			   stat->s_ipifordisabled, stat->s_plugged,
+			   stat->s_overipilimit, stat->s_giveuplimit,
+			   stat->s_congested);
 
 		/* destination side statistics */
 		seq_printf(file,
-			   "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
+			"%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n",
 			   read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)),
 			   stat->d_requestee, cycles_2_us(stat->d_time),
 			   stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
 			   stat->d_nomsg, stat->d_retries, stat->d_canceled,
 			   stat->d_nocanceled, stat->d_resets,
 			   stat->d_rcanceled);
-		seq_printf(file, "%ld %ld %ld %ld %ld\n",
-			stat->s_bau_disabled, stat->s_bau_reenabled,
-			stat->s_uv2_wars, stat->s_uv2_wars_hw,
-			stat->s_uv2_war_waits);
 	}
 	return 0;
 }
@@ -1439,13 +1411,14 @@ static ssize_t tunables_read(struct file *file, char __user *userbuf,
 	char *buf;
 	int ret;
 
-	buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n",
-		"max_concur plugged_delay plugsb4reset",
-		"timeoutsb4reset ipi_reset_limit complete_threshold",
-		"congested_response_us congested_reps congested_period",
+	buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d %d\n",
+		"max_concur plugged_delay plugsb4reset timeoutsb4reset",
+		"ipi_reset_limit complete_threshold congested_response_us",
+		"congested_reps disabled_period giveup_limit",
 		max_concurr, plugged_delay, plugsb4reset,
 		timeoutsb4reset, ipi_reset_limit, complete_threshold,
-		congested_respns_us, congested_reps, congested_period);
+		congested_respns_us, congested_reps, disabled_period,
+		giveup_limit);
 
 	if (!buf)
 		return -ENOMEM;
@@ -1616,7 +1589,8 @@ static ssize_t tunables_write(struct file *file, const char __user *user,
 		bcp->complete_threshold =	complete_threshold;
 		bcp->cong_response_us =		congested_respns_us;
 		bcp->cong_reps =		congested_reps;
-		bcp->cong_period =		congested_period;
+		bcp->disabled_period =		sec_2_cycles(disabled_period);
+		bcp->giveup_limit =		giveup_limit;
 	}
 	return count;
 }
@@ -1745,6 +1719,10 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
 			 *   fairness chaining multilevel count replied_to
 			 */
 		} else {
+			/*
+			 * BIOS uses legacy mode, but UV2 hardware always
+			 * uses native mode for selective broadcasts.
+			 */
 			uv2_hdr = &bd2->header.uv2_hdr;
 			uv2_hdr->swack_flag =	1;
 			uv2_hdr->base_dest_nasid =
@@ -1896,10 +1874,11 @@ static void __init init_per_cpu_tunables(void)
 		bcp->complete_threshold		= complete_threshold;
 		bcp->cong_response_us		= congested_respns_us;
 		bcp->cong_reps			= congested_reps;
-		bcp->cong_period		= congested_period;
-		bcp->clocks_per_100_usec =	usec_2_cycles(100);
+		bcp->disabled_period =		sec_2_cycles(disabled_period);
+		bcp->giveup_limit =		giveup_limit;
 		spin_lock_init(&bcp->queue_lock);
 		spin_lock_init(&bcp->uvhub_lock);
+		spin_lock_init(&bcp->disable_lock);
 	}
 }
 
@@ -2020,7 +1999,6 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
 		}
 		bcp->uvhub_master = *hmasterp;
 		bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
-		bcp->using_desc = bcp->uvhub_cpu;
 		if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
 			printk(KERN_EMERG "%d cpus per uvhub invalid\n",
 				bcp->uvhub_cpu);
@@ -2123,7 +2101,6 @@ static int __init uv_bau_init(void)
 	}
 
 	nuvhubs = uv_num_possible_blades();
-	spin_lock_init(&disable_lock);
 	congested_cycles = usec_2_cycles(congested_respns_us);
 
 	uv_base_pnode = 0x7fffffff;
-- 
cgit v1.2.3


From 35a468732289372aab506d7cf73f98f0379888ae Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Thu, 21 Jun 2012 17:52:52 +0900
Subject: perf evsel: Fix a build failure on cross compilation

The commit c410431cefefd ("perf tools: Reconstruct event with modifiers
from perf_event_attr") added the line, but it's broken since it needs to
go up 3 directories to get to the kernel root directory, not 2.

However host gcc contains /usr/local/include in its search path, so that
it can find the perf_event.h in /usr/include. This why we didn't notice
the problem yet.  But when I tried to cross compile it appears like:

      CC util/evsel.o
  util/evsel.c:18:44: error: ../../include/linux/perf_event.h: No such file or directory
  make: *** [util/evsel.o] Error 1

Looking at the source, it isn't needed at all as evsel.h already
included the perf_event.h. So simply remove it would solve the problem.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1340268772-5737-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 876f639d69ed..3d1f6968f175 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -15,7 +15,6 @@
 #include "cpumap.h"
 #include "thread_map.h"
 #include "target.h"
-#include "../../include/linux/perf_event.h"
 
 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0))
-- 
cgit v1.2.3


From 7a25b2d32b9cb0b813d56ee6109acf90f3c9f1e5 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 21 Jun 2012 12:25:16 +0200
Subject: perf test: Fix parse events test to follow proper raw event name

Following commit changed raw event names to carry event modificator.

  perf evsel: Reconstruct raw event with modifiers from perf_event_attr
  commit 6eef3d9c2bcf52b7a3c18e609f5838c007b989a4
  Author: Arnaldo Carvalho de Melo <acme@redhat.com>

The perf_evsel__name function now returns ':mod' suffix for raw events,
so we need to follow that in current tests.

All tests pass now for 'perf test parse' suite.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1340274316-5161-1-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events-test.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c
index 229af6da33a2..a0f61a2a6835 100644
--- a/tools/perf/util/parse-events-test.c
+++ b/tools/perf/util/parse-events-test.c
@@ -413,19 +413,20 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel;
 
-	/* cpu/config=1,name=krava1/u */
+	/* cpu/config=1,name=krava/u */
 	evsel = list_entry(evlist->entries.next, struct perf_evsel, node);
 	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
 	TEST_ASSERT_VAL("wrong config",  1 == evsel->attr.config);
 	TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava"));
 
-	/* cpu/config=2/" */
+	/* cpu/config=2/u" */
 	evsel = list_entry(evsel->node.next, struct perf_evsel, node);
 	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
 	TEST_ASSERT_VAL("wrong config",  2 == evsel->attr.config);
-	TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "raw 0x2"));
+	TEST_ASSERT_VAL("wrong name",
+			!strcmp(perf_evsel__name(evsel), "raw 0x2:u"));
 
 	return 0;
 }
-- 
cgit v1.2.3


From 4069d6f86bebce1a1e3456ef721511b4b81958f8 Mon Sep 17 00:00:00 2001
From: Mattia Dongili <malattia@linux.it>
Date: Sat, 9 Jun 2012 13:18:08 +0900
Subject: sony-laptop: use an enum for SNC event types

Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
---
 drivers/platform/x86/sony-laptop.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 210d4ae547c2..615ab06b6cd3 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -1172,6 +1172,10 @@ static int sony_nc_hotkeys_decode(u32 event, unsigned int handle)
 /*
  * ACPI callbacks
  */
+enum event_types {
+	HOTKEY = 1,
+	KILLSWITCH
+};
 static void sony_nc_notify(struct acpi_device *device, u32 event)
 {
 	u32 real_ev = event;
@@ -1196,7 +1200,7 @@ static void sony_nc_notify(struct acpi_device *device, u32 event)
 		/* hotkey event */
 		case 0x0100:
 		case 0x0127:
-			ev_type = 1;
+			ev_type = HOTKEY;
 			real_ev = sony_nc_hotkeys_decode(event, handle);
 
 			if (real_ev > 0)
@@ -1216,7 +1220,7 @@ static void sony_nc_notify(struct acpi_device *device, u32 event)
 			 * update the rfkill device status when the
 			 * switch is moved.
 			 */
-			ev_type = 2;
+			ev_type = KILLSWITCH;
 			sony_call_snc_handle(handle, 0x0100, &result);
 			real_ev = result & 0x03;
 
@@ -1238,7 +1242,7 @@ static void sony_nc_notify(struct acpi_device *device, u32 event)
 
 	} else {
 		/* old style event */
-		ev_type = 1;
+		ev_type = HOTKEY;
 		sony_laptop_report_input_event(real_ev);
 	}
 
-- 
cgit v1.2.3


From bb384b5295323ed58260aeaff22d8bbe32988396 Mon Sep 17 00:00:00 2001
From: Marco Chiappero <marco@absence.it>
Date: Sat, 9 Jun 2012 13:18:09 +0900
Subject: sony-laptop: notify userspace of GFX switch position changes

Some Vaios come with both integrated and discrete graphics, plus a
switch for choosing one of the two. When the switch position is changed,
a notification is generated.

Signed-off-by: Marco Chiappero <marco@absence.it>
Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
---
 drivers/platform/x86/sony-laptop.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 615ab06b6cd3..4f42e5661bf5 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -1174,7 +1174,8 @@ static int sony_nc_hotkeys_decode(u32 event, unsigned int handle)
  */
 enum event_types {
 	HOTKEY = 1,
-	KILLSWITCH
+	KILLSWITCH,
+	GFX_SWITCH
 };
 static void sony_nc_notify(struct acpi_device *device, u32 event)
 {
@@ -1230,6 +1231,24 @@ static void sony_nc_notify(struct acpi_device *device, u32 event)
 
 			break;
 
+		case 0x0128:
+		case 0x0146:
+			/* Hybrid GFX switching */
+			sony_call_snc_handle(handle, 0x0000, &result);
+			dprintk("GFX switch event received (reason: %s)\n",
+					(result & 0x01) ?
+					"switch change" : "unknown");
+
+			/* verify the switch state
+			 * 1: discrete GFX
+			 * 0: integrated GFX
+			 */
+			sony_call_snc_handle(handle, 0x0100, &result);
+
+			ev_type = GFX_SWITCH;
+			real_ev = result & 0xff;
+			break;
+
 		default:
 			dprintk("Unknown event 0x%x for handle 0x%x\n",
 					event, handle);
-- 
cgit v1.2.3


From 15aa5c75468a103cdee1a0e0ec26aad979bf71a5 Mon Sep 17 00:00:00 2001
From: Mattia Dongili <malattia@linux.it>
Date: Mon, 11 Jun 2012 07:18:31 +0900
Subject: sony-laptop: store battery care limits on batteries

Some models offer the option to store the limits on the battery
(firmware?).

Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
---
 drivers/platform/x86/sony-laptop.c | 41 +++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 4f42e5661bf5..f045e3e59cd6 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -1916,32 +1916,33 @@ static ssize_t sony_nc_battery_care_limit_store(struct device *dev,
 	 *  bits 4,5: store the limit into the EC
 	 *  bits 6,7: store the limit into the battery
 	 */
+	cmd = 0;
 
-	/*
-	 * handle 0x0115 should allow storing on battery too;
-	 * handle 0x0136 same as 0x0115 + health status;
-	 * handle 0x013f, same as 0x0136 but no storing on the battery
-	 *
-	 * Store only inside the EC for now, regardless the handle number
-	 */
-	if (value == 0)
-		/* disable limits */
-		cmd = 0x0;
+	if (value > 0) {
+		if (value <= 50)
+			cmd = 0x20;
 
-	else if (value <= 50)
-		cmd = 0x21;
+		else if (value <= 80)
+			cmd = 0x10;
 
-	else if (value <= 80)
-		cmd = 0x11;
+		else if (value <= 100)
+			cmd = 0x30;
+
+		else
+			return -EINVAL;
 
-	else if (value <= 100)
-		cmd = 0x31;
+		/*
+		 * handle 0x0115 should allow storing on battery too;
+		 * handle 0x0136 same as 0x0115 + health status;
+		 * handle 0x013f, same as 0x0136 but no storing on the battery
+		 */
+		if (bcare_ctl->handle != 0x013f)
+			cmd = cmd | (cmd << 2);
 
-	else
-		return -EINVAL;
+		cmd = (cmd | 0x1) << 0x10;
+	}
 
-	if (sony_call_snc_handle(bcare_ctl->handle, (cmd << 0x10) | 0x0100,
-				&result))
+	if (sony_call_snc_handle(bcare_ctl->handle, cmd | 0x0100, &result))
 		return -EIO;
 
 	return count;
-- 
cgit v1.2.3


From 014fc8fbece33d42e2aa92d289fffa213a159321 Mon Sep 17 00:00:00 2001
From: Mattia Dongili <malattia@linux.it>
Date: Sat, 9 Jun 2012 13:18:11 +0900
Subject: sony-laptop: add lid backlight support for handle 0x143

Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
---
 drivers/platform/x86/sony-laptop.c | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index f045e3e59cd6..a94e13fe1f42 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -1010,6 +1010,7 @@ static ssize_t sony_nc_sysfs_store(struct device *dev,
 struct sony_backlight_props {
 	struct backlight_device *dev;
 	int			handle;
+	int			cmd_base;
 	u8			offset;
 	u8			maxlvl;
 };
@@ -1037,7 +1038,7 @@ static int sony_nc_get_brightness_ng(struct backlight_device *bd)
 	struct sony_backlight_props *sdev =
 		(struct sony_backlight_props *)bl_get_data(bd);
 
-	sony_call_snc_handle(sdev->handle, 0x0200, &result);
+	sony_call_snc_handle(sdev->handle, sdev->cmd_base + 0x100, &result);
 
 	return (result & 0xff) - sdev->offset;
 }
@@ -1049,7 +1050,8 @@ static int sony_nc_update_status_ng(struct backlight_device *bd)
 		(struct sony_backlight_props *)bl_get_data(bd);
 
 	value = bd->props.brightness + sdev->offset;
-	if (sony_call_snc_handle(sdev->handle, 0x0100 | (value << 16), &result))
+	if (sony_call_snc_handle(sdev->handle, sdev->cmd_base | (value << 0x10),
+				&result))
 		return -EIO;
 
 	return value;
@@ -2496,6 +2498,7 @@ static void sony_nc_backlight_ng_read_limits(int handle,
 {
 	u64 offset;
 	int i;
+	int lvl_table_len = 0;
 	u8 min = 0xff, max = 0x00;
 	unsigned char buffer[32] = { 0 };
 
@@ -2515,11 +2518,21 @@ static void sony_nc_backlight_ng_read_limits(int handle,
 	if (i < 0)
 		return;
 
+	switch (handle) {
+	case 0x012f:
+	case 0x0137:
+		lvl_table_len = 9;
+		break;
+	case 0x143:
+		lvl_table_len = 16;
+		break;
+	}
+
 	/* the buffer lists brightness levels available, brightness levels are
 	 * from position 0 to 8 in the array, other values are used by ALS
 	 * control.
 	 */
-	for (i = 0; i < 9 && i < ARRAY_SIZE(buffer); i++) {
+	for (i = 0; i < lvl_table_len && i < ARRAY_SIZE(buffer); i++) {
 
 		dprintk("Brightness level: %d\n", buffer[i]);
 
@@ -2546,14 +2559,22 @@ static void sony_nc_backlight_setup(void)
 
 	if (sony_find_snc_handle(0x12f) != -1) {
 		ops = &sony_backlight_ng_ops;
+		sony_bl_props.cmd_base = 0x0100;
 		sony_nc_backlight_ng_read_limits(0x12f, &sony_bl_props);
 		max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset;
 
 	} else if (sony_find_snc_handle(0x137) != -1) {
 		ops = &sony_backlight_ng_ops;
+		sony_bl_props.cmd_base = 0x0100;
 		sony_nc_backlight_ng_read_limits(0x137, &sony_bl_props);
 		max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset;
 
+	} else if (sony_find_snc_handle(0x143) != -1) {
+		ops = &sony_backlight_ng_ops;
+		sony_bl_props.cmd_base = 0x3000;
+		sony_nc_backlight_ng_read_limits(0x143, &sony_bl_props);
+		max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset;
+
 	} else if (ACPI_SUCCESS(acpi_get_handle(sony_nc_acpi_handle, "GBRT",
 						&unused))) {
 		ops = &sony_backlight_ops;
-- 
cgit v1.2.3


From ca3c2c706de39b3400e57254dce054bf7350efa2 Mon Sep 17 00:00:00 2001
From: Mattia Dongili <malattia@linux.it>
Date: Sat, 9 Jun 2012 13:18:12 +0900
Subject: sony-laptop: input initialization should be done before SNC

SNC needs input devices so better have those ready before starting
handle events.

Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
---
 drivers/platform/x86/sony-laptop.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index a94e13fe1f42..89ff6d845f34 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -2642,6 +2642,12 @@ static int sony_nc_add(struct acpi_device *device)
 		}
 	}
 
+	result = sony_laptop_setup_input(device);
+	if (result) {
+		pr_err("Unable to create input devices\n");
+		goto outplatform;
+	}
+
 	if (ACPI_SUCCESS(acpi_get_handle(sony_nc_acpi_handle, "ECON",
 					 &handle))) {
 		int arg = 1;
@@ -2659,12 +2665,6 @@ static int sony_nc_add(struct acpi_device *device)
 	}
 
 	/* setup input devices and helper fifo */
-	result = sony_laptop_setup_input(device);
-	if (result) {
-		pr_err("Unable to create input devices\n");
-		goto outsnc;
-	}
-
 	if (acpi_video_backlight_support()) {
 		pr_info("brightness ignored, must be controlled by ACPI video driver\n");
 	} else {
@@ -2712,22 +2712,21 @@ static int sony_nc_add(struct acpi_device *device)
 
 	return 0;
 
-      out_sysfs:
+out_sysfs:
 	for (item = sony_nc_values; item->name; ++item) {
 		device_remove_file(&sony_pf_device->dev, &item->devattr);
 	}
 	sony_nc_backlight_cleanup();
-
-	sony_laptop_remove_input();
-
-      outsnc:
 	sony_nc_function_cleanup(sony_pf_device);
 	sony_nc_handles_cleanup(sony_pf_device);
 
-      outpresent:
+outplatform:
+	sony_laptop_remove_input();
+
+outpresent:
 	sony_pf_remove();
 
-      outwalk:
+outwalk:
 	sony_nc_rfkill_cleanup();
 	return result;
 }
-- 
cgit v1.2.3


From c7a2918373983b32db3ca35823d930641747e26f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 9 Jun 2012 13:18:13 +0900
Subject: sony-laptop: fix sony_nc_sysfs_store()

We made this an unsigned long and it causes a bug on 64 bit big endian
systems when we try to pass the value to sony_nc_int_call().

Also value has to be signed because validate() returns negative error
codes.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
---
 drivers/platform/x86/sony-laptop.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 89ff6d845f34..78e6389d2cae 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -973,7 +973,7 @@ static ssize_t sony_nc_sysfs_store(struct device *dev,
 			       struct device_attribute *attr,
 			       const char *buffer, size_t count)
 {
-	unsigned long value = 0;
+	int value;
 	int ret = 0;
 	struct sony_nc_value *item =
 	    container_of(attr, struct sony_nc_value, devattr);
@@ -984,7 +984,7 @@ static ssize_t sony_nc_sysfs_store(struct device *dev,
 	if (count > 31)
 		return -EINVAL;
 
-	if (kstrtoul(buffer, 10, &value))
+	if (kstrtoint(buffer, 10, &value))
 		return -EINVAL;
 
 	if (item->validate)
@@ -994,7 +994,7 @@ static ssize_t sony_nc_sysfs_store(struct device *dev,
 		return value;
 
 	ret = sony_nc_int_call(sony_nc_acpi_handle, *item->acpiset,
-			(int *)&value, NULL);
+			       &value, NULL);
 	if (ret < 0)
 		return -EIO;
 
-- 
cgit v1.2.3


From 56f4a9f76d8ce7c7cef92905c909aad0c7e5c9db Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 9 Jun 2012 13:18:14 +0900
Subject: sony-laptop: fix a couple signedness bugs

This needs to be signed to handle negative error codes.
Remove a redundant check, read_limits is always called with a valid
handle.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
---
 drivers/platform/x86/sony-laptop.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 78e6389d2cae..b9499b65c1cd 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -2139,7 +2139,7 @@ static ssize_t sony_nc_thermal_mode_show(struct device *dev,
 		struct device_attribute *attr, char *buffer)
 {
 	ssize_t count = 0;
-	unsigned int mode = sony_nc_thermal_mode_get();
+	int mode = sony_nc_thermal_mode_get();
 
 	if (mode < 0)
 		return mode;
@@ -2507,8 +2507,6 @@ static void sony_nc_backlight_ng_read_limits(int handle,
 	props->maxlvl = 0xff;
 
 	offset = sony_find_snc_handle(handle);
-	if (offset < 0)
-		return;
 
 	/* try to read the boundaries from ACPI tables, if we fail the above
 	 * defaults should be reasonable
-- 
cgit v1.2.3


From a1071a5abf1f4d4a7f8324e21b8b32b1342013c7 Mon Sep 17 00:00:00 2001
From: Mattia Dongili <malattia@linux.it>
Date: Thu, 14 Jun 2012 06:36:02 +0900
Subject: sony-laptop: correct find_snc_handle failure checks

Since bab7084c745bf4d75b760728387f375fd34dc683, find_snc_handle
returns -EINVAL, not -1.

Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
---
 drivers/platform/x86/sony-laptop.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index b9499b65c1cd..d456ff0c73b7 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -2555,19 +2555,19 @@ static void sony_nc_backlight_setup(void)
 	const struct backlight_ops *ops = NULL;
 	struct backlight_properties props;
 
-	if (sony_find_snc_handle(0x12f) != -1) {
+	if (sony_find_snc_handle(0x12f) >= 0) {
 		ops = &sony_backlight_ng_ops;
 		sony_bl_props.cmd_base = 0x0100;
 		sony_nc_backlight_ng_read_limits(0x12f, &sony_bl_props);
 		max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset;
 
-	} else if (sony_find_snc_handle(0x137) != -1) {
+	} else if (sony_find_snc_handle(0x137) >= 0) {
 		ops = &sony_backlight_ng_ops;
 		sony_bl_props.cmd_base = 0x0100;
 		sony_nc_backlight_ng_read_limits(0x137, &sony_bl_props);
 		max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset;
 
-	} else if (sony_find_snc_handle(0x143) != -1) {
+	} else if (sony_find_snc_handle(0x143) >= 0) {
 		ops = &sony_backlight_ng_ops;
 		sony_bl_props.cmd_base = 0x3000;
 		sony_nc_backlight_ng_read_limits(0x143, &sony_bl_props);
-- 
cgit v1.2.3


From 57f9616b79549e772cf4dd3aa1d2df5b6c8acdfa Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 12 Jun 2012 19:28:50 +0300
Subject: ideapad: uninitialized data in ideapad_acpi_add()

We only initialize the high bits of "cfg".  It probably doesn't cause
a problem given that this is platform specific code and doesn't have to
worry about endianness etc.  But it's sort of messy.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
---
 drivers/platform/x86/ideapad-laptop.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 4f20f8dd3d7c..17f6dfd8dbfb 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -694,10 +694,10 @@ MODULE_DEVICE_TABLE(acpi, ideapad_device_ids);
 static int __devinit ideapad_acpi_add(struct acpi_device *adevice)
 {
 	int ret, i;
-	unsigned long cfg;
+	int cfg;
 	struct ideapad_private *priv;
 
-	if (read_method_int(adevice->handle, "_CFG", (int *)&cfg))
+	if (read_method_int(adevice->handle, "_CFG", &cfg))
 		return -ENODEV;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
@@ -721,7 +721,7 @@ static int __devinit ideapad_acpi_add(struct acpi_device *adevice)
 		goto input_failed;
 
 	for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++) {
-		if (test_bit(ideapad_rfk_data[i].cfgbit, &cfg))
+		if (test_bit(ideapad_rfk_data[i].cfgbit, &priv->cfg))
 			ideapad_register_rfkill(adevice, i);
 		else
 			priv->rfk[i] = NULL;
-- 
cgit v1.2.3


From 88ca518b0bb4161e5f20f8a1d9cc477cae294e54 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 25 Jun 2012 15:07:17 +0200
Subject: intel_ips: blacklist HP ProBook laptops

intel_ips driver spews the warning message
  "ME failed to update for more than 1s, likely hung"
at each second endlessly on HP ProBook laptops with IronLake.

As this has never worked, better to blacklist the driver for now.

Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
---
 drivers/platform/x86/intel_ips.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index 0ffdb3cde2bb..9af4257d4901 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -72,6 +72,7 @@
 #include <linux/string.h>
 #include <linux/tick.h>
 #include <linux/timer.h>
+#include <linux/dmi.h>
 #include <drm/i915_drm.h>
 #include <asm/msr.h>
 #include <asm/processor.h>
@@ -1485,6 +1486,24 @@ static DEFINE_PCI_DEVICE_TABLE(ips_id_table) = {
 
 MODULE_DEVICE_TABLE(pci, ips_id_table);
 
+static int ips_blacklist_callback(const struct dmi_system_id *id)
+{
+	pr_info("Blacklisted intel_ips for %s\n", id->ident);
+	return 1;
+}
+
+static const struct dmi_system_id ips_blacklist[] = {
+	{
+		.callback = ips_blacklist_callback,
+		.ident = "HP ProBook",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook"),
+		},
+	},
+	{ }	/* terminating entry */
+};
+
 static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	u64 platform_info;
@@ -1494,6 +1513,9 @@ static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	u16 htshi, trc, trc_required_mask;
 	u8 tse;
 
+	if (dmi_check_system(ips_blacklist))
+		return -ENODEV;
+
 	ips = kzalloc(sizeof(struct ips_driver), GFP_KERNEL);
 	if (!ips)
 		return -ENOMEM;
-- 
cgit v1.2.3


From da3789628f88684d3f0fb4e6a6bc086c395ac3cb Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 27 Jun 2012 13:08:42 -0300
Subject: perf tools: Stop using a global trace events description list

The pevent thing is per perf.data file, so I made it stop being static
and become a perf_session member, so tools processing perf.data files
use perf_session and _there_ we read the trace events description into
session->pevent and then change everywhere to stop using that single
global pevent variable and use the per session one.

Note that it _doesn't_ fall backs to trace__event_id, as we're not
interested at all in what is present in the
/sys/kernel/debug/tracing/events in the workstation doing the analysis,
just in what is in the perf.data file.

This patch also introduces perf_session__set_tracepoints_handlers that
is the perf perf.data/session way to associate handlers to tracepoint
events by resolving their IDs using the events descriptions stored in a
perf.data file. Make 'perf sched' use it.

Reported-by: Dmitry Antipov <dmitry.antipov@linaro.org>
Tested-by: Dmitry Antipov <dmitry.antipov@linaro.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linaro-dev@lists.linaro.org
Cc: patches@linaro.org
Link: http://lkml.kernel.org/r/20120625232016.GA28525@infradead.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kmem.c                          | 37 ++++++---
 tools/perf/builtin-lock.c                          |  4 +-
 tools/perf/builtin-sched.c                         | 36 +++++---
 tools/perf/builtin-script.c                        | 66 ++++++++++-----
 tools/perf/util/evlist.c                           |  4 +-
 tools/perf/util/evlist.h                           |  3 +
 tools/perf/util/header.c                           | 31 +++----
 .../perf/util/scripting-engines/trace-event-perl.c | 28 ++++---
 .../util/scripting-engines/trace-event-python.c    | 21 +++--
 tools/perf/util/session.c                          | 56 +++++++++++++
 tools/perf/util/session.h                          | 10 +++
 tools/perf/util/trace-event-parse.c                | 58 ++++++-------
 tools/perf/util/trace-event-read.c                 | 97 +++++++++++-----------
 tools/perf/util/trace-event-scripting.c            |  7 +-
 tools/perf/util/trace-event.h                      | 38 +++++----
 15 files changed, 314 insertions(+), 182 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 547af48deb4f..ce35015f2dc6 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -57,6 +57,11 @@ static unsigned long nr_allocs, nr_cross_allocs;
 
 #define PATH_SYS_NODE	"/sys/devices/system/node"
 
+struct perf_kmem {
+	struct perf_tool    tool;
+	struct perf_session *session;
+};
+
 static void init_cpunode_map(void)
 {
 	FILE *fp;
@@ -278,14 +283,16 @@ static void process_free_event(void *data,
 	s_alloc->alloc_cpu = -1;
 }
 
-static void process_raw_event(union perf_event *raw_event __used, void *data,
+static void process_raw_event(struct perf_tool *tool,
+			      union perf_event *raw_event __used, void *data,
 			      int cpu, u64 timestamp, struct thread *thread)
 {
+	struct perf_kmem *kmem = container_of(tool, struct perf_kmem, tool);
 	struct event_format *event;
 	int type;
 
-	type = trace_parse_common_type(data);
-	event = trace_find_event(type);
+	type = trace_parse_common_type(kmem->session->pevent, data);
+	event = pevent_find_event(kmem->session->pevent, type);
 
 	if (!strcmp(event->name, "kmalloc") ||
 	    !strcmp(event->name, "kmem_cache_alloc")) {
@@ -306,7 +313,7 @@ static void process_raw_event(union perf_event *raw_event __used, void *data,
 	}
 }
 
-static int process_sample_event(struct perf_tool *tool __used,
+static int process_sample_event(struct perf_tool *tool,
 				union perf_event *event,
 				struct perf_sample *sample,
 				struct perf_evsel *evsel __used,
@@ -322,16 +329,18 @@ static int process_sample_event(struct perf_tool *tool __used,
 
 	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
-	process_raw_event(event, sample->raw_data, sample->cpu,
+	process_raw_event(tool, event, sample->raw_data, sample->cpu,
 			  sample->time, thread);
 
 	return 0;
 }
 
-static struct perf_tool perf_kmem = {
-	.sample			= process_sample_event,
-	.comm			= perf_event__process_comm,
-	.ordered_samples	= true,
+static struct perf_kmem perf_kmem = {
+	.tool = {
+		.sample			= process_sample_event,
+		.comm			= perf_event__process_comm,
+		.ordered_samples	= true,
+	},
 };
 
 static double fragmentation(unsigned long n_req, unsigned long n_alloc)
@@ -486,11 +495,15 @@ static void sort_result(void)
 static int __cmd_kmem(void)
 {
 	int err = -EINVAL;
-	struct perf_session *session = perf_session__new(input_name, O_RDONLY,
-							 0, false, &perf_kmem);
+	struct perf_session *session;
+
+	session = perf_session__new(input_name, O_RDONLY, 0, false,
+				    &perf_kmem.tool);
 	if (session == NULL)
 		return -ENOMEM;
 
+	perf_kmem.session = session;
+
 	if (perf_session__create_kernel_maps(session) < 0)
 		goto out_delete;
 
@@ -498,7 +511,7 @@ static int __cmd_kmem(void)
 		goto out_delete;
 
 	setup_pager();
-	err = perf_session__process_events(session, &perf_kmem);
+	err = perf_session__process_events(session, &perf_kmem.tool);
 	if (err != 0)
 		goto out_delete;
 	sort_result();
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index fd53319de20d..b3c428548868 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -724,8 +724,8 @@ process_raw_event(void *data, int cpu, u64 timestamp, struct thread *thread)
 	struct event_format *event;
 	int type;
 
-	type = trace_parse_common_type(data);
-	event = trace_find_event(type);
+	type = trace_parse_common_type(session->pevent, data);
+	event = pevent_find_event(session->pevent, type);
 
 	if (!strcmp(event->name, "lock_acquire"))
 		process_lock_acquire_event(data, event, cpu, timestamp, thread);
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 9fe77b185338..7a9ad2b1ee76 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -43,6 +43,11 @@ static u64			sleep_measurement_overhead;
 
 static unsigned long		nr_tasks;
 
+struct perf_sched {
+	struct perf_tool    tool;
+	struct perf_session *session;
+};
+
 struct sched_atom;
 
 struct task_desc {
@@ -1597,6 +1602,8 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool,
 						 struct perf_evsel *evsel,
 						 struct machine *machine)
 {
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+	struct pevent *pevent = sched->session->pevent;
 	struct thread *thread = machine__findnew_thread(machine, sample->pid);
 
 	if (thread == NULL) {
@@ -1612,7 +1619,8 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool,
 		tracepoint_handler f = evsel->handler.func;
 
 		if (evsel->handler.data == NULL)
-			evsel->handler.data = trace_find_event(evsel->attr.config);
+			evsel->handler.data = pevent_find_event(pevent,
+							  evsel->attr.config);
 
 		f(tool, evsel->handler.data, sample, machine, thread);
 	}
@@ -1620,12 +1628,14 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool,
 	return 0;
 }
 
-static struct perf_tool perf_sched = {
-	.sample			= perf_sched__process_tracepoint_sample,
-	.comm			= perf_event__process_comm,
-	.lost			= perf_event__process_lost,
-	.fork			= perf_event__process_task,
-	.ordered_samples	= true,
+static struct perf_sched perf_sched = {
+	.tool = {
+		.sample		 = perf_sched__process_tracepoint_sample,
+		.comm		 = perf_event__process_comm,
+		.lost		 = perf_event__process_lost,
+		.fork		 = perf_event__process_task,
+		.ordered_samples = true,
+	},
 };
 
 static void read_events(bool destroy, struct perf_session **psession)
@@ -1640,16 +1650,20 @@ static void read_events(bool destroy, struct perf_session **psession)
 		{ "sched:sched_process_exit", process_sched_exit_event, },
 		{ "sched:sched_migrate_task", process_sched_migrate_task_event, },
 	};
-	struct perf_session *session = perf_session__new(input_name, O_RDONLY,
-							 0, false, &perf_sched);
+	struct perf_session *session;
+
+	session = perf_session__new(input_name, O_RDONLY, 0, false,
+				    &perf_sched.tool);
 	if (session == NULL)
 		die("No Memory");
 
-	err = perf_evlist__set_tracepoints_handlers_array(session->evlist, handlers);
+	perf_sched.session = session;
+
+	err = perf_session__set_tracepoints_handlers(session, handlers);
 	assert(err == 0);
 
 	if (perf_session__has_traces(session, "record -R")) {
-		err = perf_session__process_events(session, &perf_sched);
+		err = perf_session__process_events(session, &perf_sched.tool);
 		if (err)
 			die("Failed to process events, error %d", err);
 
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 8fecd3b8130a..1e60ab70b2b1 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -28,6 +28,11 @@ static bool			system_wide;
 static const char		*cpu_list;
 static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 
+struct perf_script {
+	struct perf_tool    tool;
+	struct perf_session *session;
+};
+
 enum perf_output_field {
 	PERF_OUTPUT_COMM            = 1U << 0,
 	PERF_OUTPUT_TID             = 1U << 1,
@@ -257,7 +262,8 @@ static int perf_session__check_output_opt(struct perf_session *session)
 	return 0;
 }
 
-static void print_sample_start(struct perf_sample *sample,
+static void print_sample_start(struct pevent *pevent,
+			       struct perf_sample *sample,
 			       struct thread *thread,
 			       struct perf_evsel *evsel)
 {
@@ -302,8 +308,14 @@ static void print_sample_start(struct perf_sample *sample,
 
 	if (PRINT_FIELD(EVNAME)) {
 		if (attr->type == PERF_TYPE_TRACEPOINT) {
-			type = trace_parse_common_type(sample->raw_data);
-			event = trace_find_event(type);
+			/*
+			 * XXX Do we really need this here?
+			 * perf_evlist__set_tracepoint_names should have done
+			 * this already
+			 */
+			type = trace_parse_common_type(pevent,
+						       sample->raw_data);
+			event = pevent_find_event(pevent, type);
 			if (event)
 				evname = event->name;
 		} else
@@ -404,6 +416,7 @@ static void print_sample_bts(union perf_event *event,
 }
 
 static void process_event(union perf_event *event __unused,
+			  struct pevent *pevent,
 			  struct perf_sample *sample,
 			  struct perf_evsel *evsel,
 			  struct machine *machine,
@@ -414,7 +427,7 @@ static void process_event(union perf_event *event __unused,
 	if (output[attr->type].fields == 0)
 		return;
 
-	print_sample_start(sample, thread, evsel);
+	print_sample_start(pevent, sample, thread, evsel);
 
 	if (is_bts_event(attr)) {
 		print_sample_bts(event, sample, evsel, machine, thread);
@@ -422,7 +435,7 @@ static void process_event(union perf_event *event __unused,
 	}
 
 	if (PRINT_FIELD(TRACE))
-		print_trace_event(sample->cpu, sample->raw_data,
+		print_trace_event(pevent, sample->cpu, sample->raw_data,
 				  sample->raw_size);
 
 	if (PRINT_FIELD(ADDR))
@@ -453,7 +466,8 @@ static int default_stop_script(void)
 	return 0;
 }
 
-static int default_generate_script(const char *outfile __unused)
+static int default_generate_script(struct pevent *pevent __unused,
+				   const char *outfile __unused)
 {
 	return 0;
 }
@@ -491,6 +505,7 @@ static int process_sample_event(struct perf_tool *tool __used,
 				struct machine *machine)
 {
 	struct addr_location al;
+	struct perf_script *scr = container_of(tool, struct perf_script, tool);
 	struct thread *thread = machine__findnew_thread(machine, event->ip.tid);
 
 	if (thread == NULL) {
@@ -522,24 +537,27 @@ static int process_sample_event(struct perf_tool *tool __used,
 	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
 		return 0;
 
-	scripting_ops->process_event(event, sample, evsel, machine, thread);
+	scripting_ops->process_event(event, scr->session->pevent,
+				     sample, evsel, machine, thread);
 
 	evsel->hists.stats.total_period += sample->period;
 	return 0;
 }
 
-static struct perf_tool perf_script = {
-	.sample		 = process_sample_event,
-	.mmap		 = perf_event__process_mmap,
-	.comm		 = perf_event__process_comm,
-	.exit		 = perf_event__process_task,
-	.fork		 = perf_event__process_task,
-	.attr		 = perf_event__process_attr,
-	.event_type	 = perf_event__process_event_type,
-	.tracing_data	 = perf_event__process_tracing_data,
-	.build_id	 = perf_event__process_build_id,
-	.ordered_samples = true,
-	.ordering_requires_timestamps = true,
+static struct perf_script perf_script = {
+	.tool = {
+		.sample		 = process_sample_event,
+		.mmap		 = perf_event__process_mmap,
+		.comm		 = perf_event__process_comm,
+		.exit		 = perf_event__process_task,
+		.fork		 = perf_event__process_task,
+		.attr		 = perf_event__process_attr,
+		.event_type	 = perf_event__process_event_type,
+		.tracing_data	 = perf_event__process_tracing_data,
+		.build_id	 = perf_event__process_build_id,
+		.ordered_samples = true,
+		.ordering_requires_timestamps = true,
+	},
 };
 
 extern volatile int session_done;
@@ -555,7 +573,7 @@ static int __cmd_script(struct perf_session *session)
 
 	signal(SIGINT, sig_handler);
 
-	ret = perf_session__process_events(session, &perf_script);
+	ret = perf_session__process_events(session, &perf_script.tool);
 
 	if (debug_mode)
 		pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered);
@@ -1337,10 +1355,13 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
 	if (!script_name)
 		setup_pager();
 
-	session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_script);
+	session = perf_session__new(input_name, O_RDONLY, 0, false,
+				    &perf_script.tool);
 	if (session == NULL)
 		return -ENOMEM;
 
+	perf_script.session = session;
+
 	if (cpu_list) {
 		if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap))
 			return -1;
@@ -1386,7 +1407,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
 			return -1;
 		}
 
-		err = scripting_ops->generate_script("perf-script");
+		err = scripting_ops->generate_script(session->pevent,
+						     "perf-script");
 		goto out;
 	}
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 7400fb3fc50c..f74e9560350e 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -224,8 +224,8 @@ out_free_attrs:
 	return err;
 }
 
-static struct perf_evsel *
-	perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
+struct perf_evsel *
+perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
 {
 	struct perf_evsel *evsel;
 
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 989bee9624c2..40d4d3cdced0 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -73,6 +73,9 @@ int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist,
 #define perf_evlist__set_tracepoints_handlers_array(evlist, array) \
 	perf_evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array))
 
+struct perf_evsel *
+perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id);
+
 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
 			 int cpu, int thread, u64 id);
 
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index a5e2015319ee..5a47aba46759 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1474,15 +1474,15 @@ out:
 
 static int process_tracing_data(struct perf_file_section *section __unused,
 			      struct perf_header *ph __unused,
-			      int feat __unused, int fd)
+			      int feat __unused, int fd, void *data)
 {
-	trace_report(fd, false);
+	trace_report(fd, data, false);
 	return 0;
 }
 
 static int process_build_id(struct perf_file_section *section,
 			    struct perf_header *ph,
-			    int feat __unused, int fd)
+			    int feat __unused, int fd, void *data __used)
 {
 	if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
 		pr_debug("Failed to read buildids, continuing...\n");
@@ -1493,7 +1493,7 @@ struct feature_ops {
 	int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
 	void (*print)(struct perf_header *h, int fd, FILE *fp);
 	int (*process)(struct perf_file_section *section,
-		       struct perf_header *h, int feat, int fd);
+		       struct perf_header *h, int feat, int fd, void *data);
 	const char *name;
 	bool full_only;
 };
@@ -1988,7 +1988,7 @@ int perf_file_header__read(struct perf_file_header *header,
 
 static int perf_file_section__process(struct perf_file_section *section,
 				      struct perf_header *ph,
-				      int feat, int fd, void *data __used)
+				      int feat, int fd, void *data)
 {
 	if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) {
 		pr_debug("Failed to lseek to %" PRIu64 " offset for feature "
@@ -2004,7 +2004,7 @@ static int perf_file_section__process(struct perf_file_section *section,
 	if (!feat_ops[feat].process)
 		return 0;
 
-	return feat_ops[feat].process(section, ph, feat, fd);
+	return feat_ops[feat].process(section, ph, feat, fd, data);
 }
 
 static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
@@ -2093,9 +2093,11 @@ static int read_attr(int fd, struct perf_header *ph,
 	return ret <= 0 ? -1 : 0;
 }
 
-static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel)
+static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel,
+					   struct pevent *pevent)
 {
-	struct event_format *event = trace_find_event(evsel->attr.config);
+	struct event_format *event = pevent_find_event(pevent,
+						       evsel->attr.config);
 	char bf[128];
 
 	if (event == NULL)
@@ -2109,13 +2111,14 @@ static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel)
 	return 0;
 }
 
-static int perf_evlist__set_tracepoint_names(struct perf_evlist *evlist)
+static int perf_evlist__set_tracepoint_names(struct perf_evlist *evlist,
+					     struct pevent *pevent)
 {
 	struct perf_evsel *pos;
 
 	list_for_each_entry(pos, &evlist->entries, node) {
 		if (pos->attr.type == PERF_TYPE_TRACEPOINT &&
-		    perf_evsel__set_tracepoint_name(pos))
+		    perf_evsel__set_tracepoint_name(pos, pevent))
 			return -1;
 	}
 
@@ -2198,12 +2201,12 @@ int perf_session__read_header(struct perf_session *session, int fd)
 		event_count =  f_header.event_types.size / sizeof(struct perf_trace_event_type);
 	}
 
-	perf_header__process_sections(header, fd, NULL,
+	perf_header__process_sections(header, fd, &session->pevent,
 				      perf_file_section__process);
 
 	lseek(fd, header->data_offset, SEEK_SET);
 
-	if (perf_evlist__set_tracepoint_names(session->evlist))
+	if (perf_evlist__set_tracepoint_names(session->evlist, session->pevent))
 		goto out_delete_evlist;
 
 	header->frozen = 1;
@@ -2419,8 +2422,8 @@ int perf_event__process_tracing_data(union perf_event *event,
 	lseek(session->fd, offset + sizeof(struct tracing_data_event),
 	      SEEK_SET);
 
-	size_read = trace_report(session->fd, session->repipe);
-
+	size_read = trace_report(session->fd, &session->pevent,
+				 session->repipe);
 	padding = ALIGN(size_read, sizeof(u64)) - size_read;
 
 	if (read(session->fd, buf, padding) < 0)
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 4c1b3d72a1d2..b3620fe12763 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -233,7 +233,8 @@ static void define_event_symbols(struct event_format *event,
 		define_event_symbols(event, ev_name, args->next);
 }
 
-static inline struct event_format *find_cache_event(int type)
+static inline
+struct event_format *find_cache_event(struct pevent *pevent, int type)
 {
 	static char ev_name[256];
 	struct event_format *event;
@@ -241,7 +242,7 @@ static inline struct event_format *find_cache_event(int type)
 	if (events[type])
 		return events[type];
 
-	events[type] = event = trace_find_event(type);
+	events[type] = event = pevent_find_event(pevent, type);
 	if (!event)
 		return NULL;
 
@@ -252,7 +253,8 @@ static inline struct event_format *find_cache_event(int type)
 	return event;
 }
 
-static void perl_process_tracepoint(union perf_event *pevent __unused,
+static void perl_process_tracepoint(union perf_event *perf_event __unused,
+				    struct pevent *pevent,
 				    struct perf_sample *sample,
 				    struct perf_evsel *evsel,
 				    struct machine *machine __unused,
@@ -275,13 +277,13 @@ static void perl_process_tracepoint(union perf_event *pevent __unused,
 	if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
 		return;
 
-	type = trace_parse_common_type(data);
+	type = trace_parse_common_type(pevent, data);
 
-	event = find_cache_event(type);
+	event = find_cache_event(pevent, type);
 	if (!event)
 		die("ug! no event found for type %d", type);
 
-	pid = trace_parse_common_pid(data);
+	pid = trace_parse_common_pid(pevent, data);
 
 	sprintf(handler, "%s::%s", event->system, event->name);
 
@@ -314,7 +316,8 @@ static void perl_process_tracepoint(union perf_event *pevent __unused,
 				offset = field->offset;
 			XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0)));
 		} else { /* FIELD_IS_NUMERIC */
-			val = read_size(data + field->offset, field->size);
+			val = read_size(pevent, data + field->offset,
+					field->size);
 			if (field->flags & FIELD_IS_SIGNED) {
 				XPUSHs(sv_2mortal(newSViv(val)));
 			} else {
@@ -368,14 +371,15 @@ static void perl_process_event_generic(union perf_event *pevent __unused,
 	LEAVE;
 }
 
-static void perl_process_event(union perf_event *pevent,
+static void perl_process_event(union perf_event *event,
+			       struct pevent *pevent,
 			       struct perf_sample *sample,
 			       struct perf_evsel *evsel,
 			       struct machine *machine,
 			       struct thread *thread)
 {
-	perl_process_tracepoint(pevent, sample, evsel, machine, thread);
-	perl_process_event_generic(pevent, sample, evsel, machine, thread);
+	perl_process_tracepoint(event, pevent, sample, evsel, machine, thread);
+	perl_process_event_generic(event, sample, evsel, machine, thread);
 }
 
 static void run_start_sub(void)
@@ -448,7 +452,7 @@ static int perl_stop_script(void)
 	return 0;
 }
 
-static int perl_generate_script(const char *outfile)
+static int perl_generate_script(struct pevent *pevent, const char *outfile)
 {
 	struct event_format *event = NULL;
 	struct format_field *f;
@@ -495,7 +499,7 @@ static int perl_generate_script(const char *outfile)
 	fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n");
 	fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n\n");
 
-	while ((event = trace_find_next_event(event))) {
+	while ((event = trace_find_next_event(pevent, event))) {
 		fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name);
 		fprintf(ofp, "\tmy (");
 
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index acb9795286c4..a8ca2f8179a9 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -190,7 +190,8 @@ static void define_event_symbols(struct event_format *event,
 		define_event_symbols(event, ev_name, args->next);
 }
 
-static inline struct event_format *find_cache_event(int type)
+static inline
+struct event_format *find_cache_event(struct pevent *pevent, int type)
 {
 	static char ev_name[256];
 	struct event_format *event;
@@ -198,7 +199,7 @@ static inline struct event_format *find_cache_event(int type)
 	if (events[type])
 		return events[type];
 
-	events[type] = event = trace_find_event(type);
+	events[type] = event = pevent_find_event(pevent, type);
 	if (!event)
 		return NULL;
 
@@ -209,7 +210,8 @@ static inline struct event_format *find_cache_event(int type)
 	return event;
 }
 
-static void python_process_event(union perf_event *pevent __unused,
+static void python_process_event(union perf_event *perf_event __unused,
+				 struct pevent *pevent,
 				 struct perf_sample *sample,
 				 struct perf_evsel *evsel __unused,
 				 struct machine *machine __unused,
@@ -233,13 +235,13 @@ static void python_process_event(union perf_event *pevent __unused,
 	if (!t)
 		Py_FatalError("couldn't create Python tuple");
 
-	type = trace_parse_common_type(data);
+	type = trace_parse_common_type(pevent, data);
 
-	event = find_cache_event(type);
+	event = find_cache_event(pevent, type);
 	if (!event)
 		die("ug! no event found for type %d", type);
 
-	pid = trace_parse_common_pid(data);
+	pid = trace_parse_common_pid(pevent, data);
 
 	sprintf(handler_name, "%s__%s", event->system, event->name);
 
@@ -284,7 +286,8 @@ static void python_process_event(union perf_event *pevent __unused,
 				offset = field->offset;
 			obj = PyString_FromString((char *)data + offset);
 		} else { /* FIELD_IS_NUMERIC */
-			val = read_size(data + field->offset, field->size);
+			val = read_size(pevent, data + field->offset,
+					field->size);
 			if (field->flags & FIELD_IS_SIGNED) {
 				if ((long long)val >= LONG_MIN &&
 				    (long long)val <= LONG_MAX)
@@ -438,7 +441,7 @@ out:
 	return err;
 }
 
-static int python_generate_script(const char *outfile)
+static int python_generate_script(struct pevent *pevent, const char *outfile)
 {
 	struct event_format *event = NULL;
 	struct format_field *f;
@@ -487,7 +490,7 @@ static int python_generate_script(const char *outfile)
 	fprintf(ofp, "def trace_end():\n");
 	fprintf(ofp, "\tprint \"in trace_end\"\n\n");
 
-	while ((event = trace_find_next_event(event))) {
+	while ((event = trace_find_next_event(pevent, event))) {
 		fprintf(ofp, "def %s__%s(", event->system, event->name);
 		fprintf(ofp, "event_name, ");
 		fprintf(ofp, "context, ");
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 6b305fbcc986..f5baff1495e6 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -14,6 +14,7 @@
 #include "sort.h"
 #include "util.h"
 #include "cpumap.h"
+#include "event-parse.h"
 
 static int perf_session__open(struct perf_session *self, bool force)
 {
@@ -1610,3 +1611,58 @@ void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
 	perf_header__fprintf_info(session, fp, full);
 	fprintf(fp, "# ========\n#\n");
 }
+
+
+int __perf_session__set_tracepoints_handlers(struct perf_session *session,
+					     const struct perf_evsel_str_handler *assocs,
+					     size_t nr_assocs)
+{
+	struct perf_evlist *evlist = session->evlist;
+	struct event_format *format;
+	struct perf_evsel *evsel;
+	char *tracepoint, *name;
+	size_t i;
+	int err;
+
+	for (i = 0; i < nr_assocs; i++) {
+		err = -ENOMEM;
+		tracepoint = strdup(assocs[i].name);
+		if (tracepoint == NULL)
+			goto out;
+
+		err = -ENOENT;
+		name = strchr(tracepoint, ':');
+		if (name == NULL)
+			goto out_free;
+
+		*name++ = '\0';
+		format = pevent_find_event_by_name(session->pevent,
+						   tracepoint, name);
+		if (format == NULL) {
+			/*
+			 * Adding a handler for an event not in the session,
+			 * just ignore it.
+			 */
+			goto next;
+		}
+
+		evsel = perf_evlist__find_tracepoint_by_id(evlist, format->id);
+		if (evsel == NULL)
+			goto next;
+
+		err = -EEXIST;
+		if (evsel->handler.func != NULL)
+			goto out_free;
+		evsel->handler.func = assocs[i].handler;
+next:
+		free(tracepoint);
+	}
+
+	err = 0;
+out:
+	return err;
+
+out_free:
+	free(tracepoint);
+	goto out;
+}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index c71a1a7b05ed..7c435bde6eb0 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -33,6 +33,7 @@ struct perf_session {
 	struct machine		host_machine;
 	struct rb_root		machines;
 	struct perf_evlist	*evlist;
+	struct pevent		*pevent;
 	/*
 	 * FIXME: Need to split this up further, we need global
 	 *	  stats + per event stats. 'perf diff' also needs
@@ -158,4 +159,13 @@ int perf_session__cpu_bitmap(struct perf_session *session,
 			     const char *cpu_list, unsigned long *cpu_bitmap);
 
 void perf_session__fprintf_info(struct perf_session *s, FILE *fp, bool full);
+
+struct perf_evsel_str_handler;
+
+int __perf_session__set_tracepoints_handlers(struct perf_session *session,
+					     const struct perf_evsel_str_handler *assocs,
+					     size_t nr_assocs);
+
+#define perf_session__set_tracepoints_handlers(session, array) \
+	__perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array))
 #endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index df2fddbf0cd2..a51bd86f4d09 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -32,29 +32,25 @@ int header_page_size_size;
 int header_page_ts_size;
 int header_page_data_offset;
 
-struct pevent *perf_pevent;
-static struct pevent *pevent;
-
 bool latency_format;
 
-int read_trace_init(int file_bigendian, int host_bigendian)
+struct pevent *read_trace_init(int file_bigendian, int host_bigendian)
 {
-	if (pevent)
-		return 0;
-
-	perf_pevent = pevent_alloc();
-	pevent = perf_pevent;
+	struct pevent *pevent = pevent_alloc();
 
-	pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT);
-	pevent_set_file_bigendian(pevent, file_bigendian);
-	pevent_set_host_bigendian(pevent, host_bigendian);
+	if (pevent != NULL) {
+		pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT);
+		pevent_set_file_bigendian(pevent, file_bigendian);
+		pevent_set_host_bigendian(pevent, host_bigendian);
+	}
 
-	return 0;
+	return pevent;
 }
 
 static int get_common_field(struct scripting_context *context,
 			    int *offset, int *size, const char *type)
 {
+	struct pevent *pevent = context->pevent;
 	struct event_format *event;
 	struct format_field *field;
 
@@ -150,7 +146,7 @@ void *raw_field_ptr(struct event_format *event, const char *name, void *data)
 	return data + field->offset;
 }
 
-int trace_parse_common_type(void *data)
+int trace_parse_common_type(struct pevent *pevent, void *data)
 {
 	struct pevent_record record;
 
@@ -158,7 +154,7 @@ int trace_parse_common_type(void *data)
 	return pevent_data_type(pevent, &record);
 }
 
-int trace_parse_common_pid(void *data)
+int trace_parse_common_pid(struct pevent *pevent, void *data)
 {
 	struct pevent_record record;
 
@@ -166,27 +162,21 @@ int trace_parse_common_pid(void *data)
 	return pevent_data_pid(pevent, &record);
 }
 
-unsigned long long read_size(void *ptr, int size)
+unsigned long long read_size(struct pevent *pevent, void *ptr, int size)
 {
 	return pevent_read_number(pevent, ptr, size);
 }
 
-struct event_format *trace_find_event(int type)
-{
-	return pevent_find_event(pevent, type);
-}
-
-
-void print_trace_event(int cpu, void *data, int size)
+void print_trace_event(struct pevent *pevent, int cpu, void *data, int size)
 {
 	struct event_format *event;
 	struct pevent_record record;
 	struct trace_seq s;
 	int type;
 
-	type = trace_parse_common_type(data);
+	type = trace_parse_common_type(pevent, data);
 
-	event = trace_find_event(type);
+	event = pevent_find_event(pevent, type);
 	if (!event) {
 		warning("ug! no event found for type %d", type);
 		return;
@@ -203,8 +193,8 @@ void print_trace_event(int cpu, void *data, int size)
 	printf("\n");
 }
 
-void print_event(int cpu, void *data, int size, unsigned long long nsecs,
-		  char *comm)
+void print_event(struct pevent *pevent, int cpu, void *data, int size,
+		 unsigned long long nsecs, char *comm)
 {
 	struct pevent_record record;
 	struct trace_seq s;
@@ -227,7 +217,8 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs,
 	printf("\n");
 }
 
-void parse_proc_kallsyms(char *file, unsigned int size __unused)
+void parse_proc_kallsyms(struct pevent *pevent,
+			 char *file, unsigned int size __unused)
 {
 	unsigned long long addr;
 	char *func;
@@ -258,7 +249,8 @@ void parse_proc_kallsyms(char *file, unsigned int size __unused)
 	}
 }
 
-void parse_ftrace_printk(char *file, unsigned int size __unused)
+void parse_ftrace_printk(struct pevent *pevent,
+			 char *file, unsigned int size __unused)
 {
 	unsigned long long addr;
 	char *printk;
@@ -282,17 +274,19 @@ void parse_ftrace_printk(char *file, unsigned int size __unused)
 	}
 }
 
-int parse_ftrace_file(char *buf, unsigned long size)
+int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size)
 {
 	return pevent_parse_event(pevent, buf, size, "ftrace");
 }
 
-int parse_event_file(char *buf, unsigned long size, char *sys)
+int parse_event_file(struct pevent *pevent,
+		     char *buf, unsigned long size, char *sys)
 {
 	return pevent_parse_event(pevent, buf, size, sys);
 }
 
-struct event_format *trace_find_next_event(struct event_format *event)
+struct event_format *trace_find_next_event(struct pevent *pevent,
+					   struct event_format *event)
 {
 	static int idx;
 
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index f097e0dd6c5c..719ed74a8565 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -114,20 +114,20 @@ static void skip(int size)
 	};
 }
 
-static unsigned int read4(void)
+static unsigned int read4(struct pevent *pevent)
 {
 	unsigned int data;
 
 	read_or_die(&data, 4);
-	return __data2host4(perf_pevent, data);
+	return __data2host4(pevent, data);
 }
 
-static unsigned long long read8(void)
+static unsigned long long read8(struct pevent *pevent)
 {
 	unsigned long long data;
 
 	read_or_die(&data, 8);
-	return __data2host8(perf_pevent, data);
+	return __data2host8(pevent, data);
 }
 
 static char *read_string(void)
@@ -168,12 +168,12 @@ static char *read_string(void)
 	return str;
 }
 
-static void read_proc_kallsyms(void)
+static void read_proc_kallsyms(struct pevent *pevent)
 {
 	unsigned int size;
 	char *buf;
 
-	size = read4();
+	size = read4(pevent);
 	if (!size)
 		return;
 
@@ -181,29 +181,29 @@ static void read_proc_kallsyms(void)
 	read_or_die(buf, size);
 	buf[size] = '\0';
 
-	parse_proc_kallsyms(buf, size);
+	parse_proc_kallsyms(pevent, buf, size);
 
 	free(buf);
 }
 
-static void read_ftrace_printk(void)
+static void read_ftrace_printk(struct pevent *pevent)
 {
 	unsigned int size;
 	char *buf;
 
-	size = read4();
+	size = read4(pevent);
 	if (!size)
 		return;
 
 	buf = malloc_or_die(size);
 	read_or_die(buf, size);
 
-	parse_ftrace_printk(buf, size);
+	parse_ftrace_printk(pevent, buf, size);
 
 	free(buf);
 }
 
-static void read_header_files(void)
+static void read_header_files(struct pevent *pevent)
 {
 	unsigned long long size;
 	char *header_event;
@@ -214,7 +214,7 @@ static void read_header_files(void)
 	if (memcmp(buf, "header_page", 12) != 0)
 		die("did not read header page");
 
-	size = read8();
+	size = read8(pevent);
 	skip(size);
 
 	/*
@@ -227,47 +227,48 @@ static void read_header_files(void)
 	if (memcmp(buf, "header_event", 13) != 0)
 		die("did not read header event");
 
-	size = read8();
+	size = read8(pevent);
 	header_event = malloc_or_die(size);
 	read_or_die(header_event, size);
 	free(header_event);
 }
 
-static void read_ftrace_file(unsigned long long size)
+static void read_ftrace_file(struct pevent *pevent, unsigned long long size)
 {
 	char *buf;
 
 	buf = malloc_or_die(size);
 	read_or_die(buf, size);
-	parse_ftrace_file(buf, size);
+	parse_ftrace_file(pevent, buf, size);
 	free(buf);
 }
 
-static void read_event_file(char *sys, unsigned long long size)
+static void read_event_file(struct pevent *pevent, char *sys,
+			    unsigned long long size)
 {
 	char *buf;
 
 	buf = malloc_or_die(size);
 	read_or_die(buf, size);
-	parse_event_file(buf, size, sys);
+	parse_event_file(pevent, buf, size, sys);
 	free(buf);
 }
 
-static void read_ftrace_files(void)
+static void read_ftrace_files(struct pevent *pevent)
 {
 	unsigned long long size;
 	int count;
 	int i;
 
-	count = read4();
+	count = read4(pevent);
 
 	for (i = 0; i < count; i++) {
-		size = read8();
-		read_ftrace_file(size);
+		size = read8(pevent);
+		read_ftrace_file(pevent, size);
 	}
 }
 
-static void read_event_files(void)
+static void read_event_files(struct pevent *pevent)
 {
 	unsigned long long size;
 	char *sys;
@@ -275,15 +276,15 @@ static void read_event_files(void)
 	int count;
 	int i,x;
 
-	systems = read4();
+	systems = read4(pevent);
 
 	for (i = 0; i < systems; i++) {
 		sys = read_string();
 
-		count = read4();
+		count = read4(pevent);
 		for (x=0; x < count; x++) {
-			size = read8();
-			read_event_file(sys, size);
+			size = read8(pevent);
+			read_event_file(pevent, sys, size);
 		}
 	}
 }
@@ -377,7 +378,7 @@ static int calc_index(void *ptr, int cpu)
 	return (unsigned long)ptr - (unsigned long)cpu_data[cpu].page;
 }
 
-struct pevent_record *trace_peek_data(int cpu)
+struct pevent_record *trace_peek_data(struct pevent *pevent, int cpu)
 {
 	struct pevent_record *data;
 	void *page = cpu_data[cpu].page;
@@ -399,15 +400,15 @@ struct pevent_record *trace_peek_data(int cpu)
 		/* FIXME: handle header page */
 		if (header_page_ts_size != 8)
 			die("expected a long long type for timestamp");
-		cpu_data[cpu].timestamp = data2host8(perf_pevent, ptr);
+		cpu_data[cpu].timestamp = data2host8(pevent, ptr);
 		ptr += 8;
 		switch (header_page_size_size) {
 		case 4:
-			cpu_data[cpu].page_size = data2host4(perf_pevent, ptr);
+			cpu_data[cpu].page_size = data2host4(pevent, ptr);
 			ptr += 4;
 			break;
 		case 8:
-			cpu_data[cpu].page_size = data2host8(perf_pevent, ptr);
+			cpu_data[cpu].page_size = data2host8(pevent, ptr);
 			ptr += 8;
 			break;
 		default:
@@ -421,10 +422,10 @@ read_again:
 
 	if (idx >= cpu_data[cpu].page_size) {
 		get_next_page(cpu);
-		return trace_peek_data(cpu);
+		return trace_peek_data(pevent, cpu);
 	}
 
-	type_len_ts = data2host4(perf_pevent, ptr);
+	type_len_ts = data2host4(pevent, ptr);
 	ptr += 4;
 
 	type_len = type_len4host(type_len_ts);
@@ -434,14 +435,14 @@ read_again:
 	case RINGBUF_TYPE_PADDING:
 		if (!delta)
 			die("error, hit unexpected end of page");
-		length = data2host4(perf_pevent, ptr);
+		length = data2host4(pevent, ptr);
 		ptr += 4;
 		length *= 4;
 		ptr += length;
 		goto read_again;
 
 	case RINGBUF_TYPE_TIME_EXTEND:
-		extend = data2host4(perf_pevent, ptr);
+		extend = data2host4(pevent, ptr);
 		ptr += 4;
 		extend <<= TS_SHIFT;
 		extend += delta;
@@ -452,7 +453,7 @@ read_again:
 		ptr += 12;
 		break;
 	case 0:
-		length = data2host4(perf_pevent, ptr);
+		length = data2host4(pevent, ptr);
 		ptr += 4;
 		die("here! length=%d", length);
 		break;
@@ -477,17 +478,17 @@ read_again:
 	return data;
 }
 
-struct pevent_record *trace_read_data(int cpu)
+struct pevent_record *trace_read_data(struct pevent *pevent, int cpu)
 {
 	struct pevent_record *data;
 
-	data = trace_peek_data(cpu);
+	data = trace_peek_data(pevent, cpu);
 	cpu_data[cpu].next = NULL;
 
 	return data;
 }
 
-ssize_t trace_report(int fd, bool __repipe)
+ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe)
 {
 	char buf[BUFSIZ];
 	char test[] = { 23, 8, 68 };
@@ -519,30 +520,32 @@ ssize_t trace_report(int fd, bool __repipe)
 	file_bigendian = buf[0];
 	host_bigendian = bigendian();
 
-	read_trace_init(file_bigendian, host_bigendian);
+	*ppevent = read_trace_init(file_bigendian, host_bigendian);
+	if (*ppevent == NULL)
+		die("read_trace_init failed");
 
 	read_or_die(buf, 1);
 	long_size = buf[0];
 
-	page_size = read4();
+	page_size = read4(*ppevent);
 
-	read_header_files();
+	read_header_files(*ppevent);
 
-	read_ftrace_files();
-	read_event_files();
-	read_proc_kallsyms();
-	read_ftrace_printk();
+	read_ftrace_files(*ppevent);
+	read_event_files(*ppevent);
+	read_proc_kallsyms(*ppevent);
+	read_ftrace_printk(*ppevent);
 
 	size = calc_data_size - 1;
 	calc_data_size = 0;
 	repipe = false;
 
 	if (show_funcs) {
-		pevent_print_funcs(perf_pevent);
+		pevent_print_funcs(*ppevent);
 		return size;
 	}
 	if (show_printk) {
-		pevent_print_printk(perf_pevent);
+		pevent_print_printk(*ppevent);
 		return size;
 	}
 
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index 18ae6c1831d3..474aa7a7df43 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -36,6 +36,7 @@ static int stop_script_unsupported(void)
 }
 
 static void process_event_unsupported(union perf_event *event __unused,
+				      struct pevent *pevent __unused,
 				      struct perf_sample *sample __unused,
 				      struct perf_evsel *evsel __unused,
 				      struct machine *machine __unused,
@@ -61,7 +62,8 @@ static int python_start_script_unsupported(const char *script __unused,
 	return -1;
 }
 
-static int python_generate_script_unsupported(const char *outfile __unused)
+static int python_generate_script_unsupported(struct pevent *pevent __unused,
+					      const char *outfile __unused)
 {
 	print_python_unsupported_msg();
 
@@ -122,7 +124,8 @@ static int perl_start_script_unsupported(const char *script __unused,
 	return -1;
 }
 
-static int perl_generate_script_unsupported(const char *outfile __unused)
+static int perl_generate_script_unsupported(struct pevent *pevent __unused,
+					    const char *outfile __unused)
 {
 	print_perl_unsupported_msg();
 
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 639852ac1117..8fef1d6687b7 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -8,6 +8,7 @@
 struct machine;
 struct perf_sample;
 union perf_event;
+struct perf_tool;
 struct thread;
 
 extern int header_page_size_size;
@@ -29,35 +30,36 @@ enum {
 
 int bigendian(void);
 
-int read_trace_init(int file_bigendian, int host_bigendian);
-void print_trace_event(int cpu, void *data, int size);
+struct pevent *read_trace_init(int file_bigendian, int host_bigendian);
+void print_trace_event(struct pevent *pevent, int cpu, void *data, int size);
 
-void print_event(int cpu, void *data, int size, unsigned long long nsecs,
-		  char *comm);
+void print_event(struct pevent *pevent, int cpu, void *data, int size,
+		 unsigned long long nsecs, char *comm);
 
-int parse_ftrace_file(char *buf, unsigned long size);
-int parse_event_file(char *buf, unsigned long size, char *sys);
+int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size);
+int parse_event_file(struct pevent *pevent,
+		     char *buf, unsigned long size, char *sys);
 
-struct pevent_record *trace_peek_data(int cpu);
-struct event_format *trace_find_event(int type);
+struct pevent_record *trace_peek_data(struct pevent *pevent, int cpu);
 
 unsigned long long
 raw_field_value(struct event_format *event, const char *name, void *data);
 void *raw_field_ptr(struct event_format *event, const char *name, void *data);
 
-void parse_proc_kallsyms(char *file, unsigned int size __unused);
-void parse_ftrace_printk(char *file, unsigned int size __unused);
+void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size);
+void parse_ftrace_printk(struct pevent *pevent, char *file, unsigned int size);
 
-ssize_t trace_report(int fd, bool repipe);
+ssize_t trace_report(int fd, struct pevent **pevent, bool repipe);
 
-int trace_parse_common_type(void *data);
-int trace_parse_common_pid(void *data);
+int trace_parse_common_type(struct pevent *pevent, void *data);
+int trace_parse_common_pid(struct pevent *pevent, void *data);
 
-struct event_format *trace_find_next_event(struct event_format *event);
-unsigned long long read_size(void *ptr, int size);
+struct event_format *trace_find_next_event(struct pevent *pevent,
+					   struct event_format *event);
+unsigned long long read_size(struct pevent *pevent, void *ptr, int size);
 unsigned long long eval_flag(const char *flag);
 
-struct pevent_record *trace_read_data(int cpu);
+struct pevent_record *trace_read_data(struct pevent *pevent, int cpu);
 int read_tracing_data(int fd, struct list_head *pattrs);
 
 struct tracing_data {
@@ -77,11 +79,12 @@ struct scripting_ops {
 	int (*start_script) (const char *script, int argc, const char **argv);
 	int (*stop_script) (void);
 	void (*process_event) (union perf_event *event,
+			       struct pevent *pevent,
 			       struct perf_sample *sample,
 			       struct perf_evsel *evsel,
 			       struct machine *machine,
 			       struct thread *thread);
-	int (*generate_script) (const char *outfile);
+	int (*generate_script) (struct pevent *pevent, const char *outfile);
 };
 
 int script_spec_register(const char *spec, struct scripting_ops *ops);
@@ -90,6 +93,7 @@ void setup_perl_scripting(void);
 void setup_python_scripting(void);
 
 struct scripting_context {
+	struct pevent *pevent;
 	void *event_data;
 };
 
-- 
cgit v1.2.3


From 209bd9e3e14712d74c8bebb028afda905d689f1c Mon Sep 17 00:00:00 2001
From: "Pierre-Loup A. Griffais" <pgriffais@nvidia.com>
Date: Fri, 22 Jun 2012 11:38:13 -0700
Subject: perf symbols: Follow .gnu_debuglink section to find separate symbols

The .gnu_debuglink section is specified to contain the filename of the
debug info file, as well as a CRC that can be used to validate it.

This doesn't currently use the checksum and relies on the usual build-id
matching for validation.

This provides more context:
http://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html

Signed-off-by: Pierre-Loup A. Griffais <pgriffais@nvidia.com>
Reported-by: Mike Sartain <mikesart@valvesoftware.com>
Tested-by: Mike Sartain <mikesart@valvesoftware.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Sartain <mikesart@valvesoftware.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/4FE4BB95.3080309@nvidia.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++-
 tools/perf/util/symbol.h |  1 +
 2 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 3e2e5ea0f03f..994f4ffdcd05 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1590,11 +1590,62 @@ out:
 	return err;
 }
 
+static int filename__read_debuglink(const char *filename,
+				    char *debuglink, size_t size)
+{
+	int fd, err = -1;
+	Elf *elf;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	Elf_Data *data;
+	Elf_Scn *sec;
+	Elf_Kind ek;
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0)
+		goto out;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL) {
+		pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename);
+		goto out_close;
+	}
+
+	ek = elf_kind(elf);
+	if (ek != ELF_K_ELF)
+		goto out_close;
+
+	if (gelf_getehdr(elf, &ehdr) == NULL) {
+		pr_err("%s: cannot get elf header.\n", __func__);
+		goto out_close;
+	}
+
+	sec = elf_section_by_name(elf, &ehdr, &shdr,
+				  ".gnu_debuglink", NULL);
+	if (sec == NULL)
+		goto out_close;
+
+	data = elf_getdata(sec, NULL);
+	if (data == NULL)
+		goto out_close;
+
+	/* the start of this section is a zero-terminated string */
+	strncpy(debuglink, data->d_buf, size);
+
+	elf_end(elf);
+
+out_close:
+	close(fd);
+out:
+	return err;
+}
+
 char dso__symtab_origin(const struct dso *dso)
 {
 	static const char origin[] = {
 		[SYMTAB__KALLSYMS]	      = 'k',
 		[SYMTAB__JAVA_JIT]	      = 'j',
+		[SYMTAB__DEBUGLINK]           = 'l',
 		[SYMTAB__BUILD_ID_CACHE]      = 'B',
 		[SYMTAB__FEDORA_DEBUGINFO]    = 'f',
 		[SYMTAB__UBUNTU_DEBUGINFO]    = 'u',
@@ -1662,10 +1713,22 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 	 */
 	want_symtab = 1;
 restart:
-	for (dso->symtab_type = SYMTAB__BUILD_ID_CACHE;
+	for (dso->symtab_type = SYMTAB__DEBUGLINK;
 	     dso->symtab_type != SYMTAB__NOT_FOUND;
 	     dso->symtab_type++) {
 		switch (dso->symtab_type) {
+		case SYMTAB__DEBUGLINK: {
+			char *debuglink;
+			strncpy(name, dso->long_name, size);
+			debuglink = name + dso->long_name_len;
+			while (debuglink != name && *debuglink != '/')
+				debuglink--;
+			if (*debuglink == '/')
+				debuglink++;
+			filename__read_debuglink(dso->long_name, debuglink,
+						 size - (debuglink - name));
+			}
+			break;
 		case SYMTAB__BUILD_ID_CACHE:
 			/* skip the locally configured cache if a symfs is given */
 			if (symbol_conf.symfs[0] ||
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index af0752b1aca1..a884b99017f0 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -257,6 +257,7 @@ enum symtab_type {
 	SYMTAB__KALLSYMS = 0,
 	SYMTAB__GUEST_KALLSYMS,
 	SYMTAB__JAVA_JIT,
+	SYMTAB__DEBUGLINK,
 	SYMTAB__BUILD_ID_CACHE,
 	SYMTAB__FEDORA_DEBUGINFO,
 	SYMTAB__UBUNTU_DEBUGINFO,
-- 
cgit v1.2.3


From 08942f6d5d992e9486b07653fd87ea8182a22fa0 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Wed, 20 Jun 2012 15:08:06 +0900
Subject: perf bench: Documentation update

The current perf-bench documentation has a couple of typos and even
lacks entire description of mem subsystem. Fix it.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1340172486-17805-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-bench.txt | 78 +++++++++++++++++++++++++++++++--
 tools/perf/bench/mem-memcpy.c           |  4 +-
 tools/perf/bench/mem-memset.c           |  8 ++--
 tools/perf/builtin-bench.c              |  4 +-
 4 files changed, 83 insertions(+), 11 deletions(-)

diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index a3dbadb26ef5..f3c716a4cad3 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -12,7 +12,7 @@ SYNOPSIS
 
 DESCRIPTION
 -----------
-This 'perf bench' command is general framework for benchmark suites.
+This 'perf bench' command is a general framework for benchmark suites.
 
 COMMON OPTIONS
 --------------
@@ -45,14 +45,20 @@ SUBSYSTEM
 'sched'::
 	Scheduler and IPC mechanisms.
 
+'mem'::
+	Memory access performance.
+
+'all'::
+	All benchmark subsystems.
+
 SUITES FOR 'sched'
 ~~~~~~~~~~~~~~~~~~
 *messaging*::
 Suite for evaluating performance of scheduler and IPC mechanisms.
 Based on hackbench by Rusty Russell.
 
-Options of *pipe*
-^^^^^^^^^^^^^^^^^
+Options of *messaging*
+^^^^^^^^^^^^^^^^^^^^^^
 -p::
 --pipe::
 Use pipe() instead of socketpair()
@@ -115,6 +121,72 @@ Example of *pipe*
                 59004 ops/sec
 ---------------------
 
+SUITES FOR 'mem'
+~~~~~~~~~~~~~~~~
+*memcpy*::
+Suite for evaluating performance of simple memory copy in various ways.
+
+Options of *memcpy*
+^^^^^^^^^^^^^^^^^^^
+-l::
+--length::
+Specify length of memory to copy (default: 1MB).
+Available units are B, KB, MB, GB and TB (case insensitive).
+
+-r::
+--routine::
+Specify routine to copy (default: default).
+Available routines are depend on the architecture.
+On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported.
+
+-i::
+--iterations::
+Repeat memcpy invocation this number of times.
+
+-c::
+--clock::
+Use perf's cpu-cycles event instead of gettimeofday syscall.
+
+-o::
+--only-prefault::
+Show only the result with page faults before memcpy.
+
+-n::
+--no-prefault::
+Show only the result without page faults before memcpy.
+
+*memset*::
+Suite for evaluating performance of simple memory set in various ways.
+
+Options of *memset*
+^^^^^^^^^^^^^^^^^^^
+-l::
+--length::
+Specify length of memory to set (default: 1MB).
+Available units are B, KB, MB, GB and TB (case insensitive).
+
+-r::
+--routine::
+Specify routine to set (default: default).
+Available routines are depend on the architecture.
+On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported.
+
+-i::
+--iterations::
+Repeat memset invocation this number of times.
+
+-c::
+--clock::
+Use perf's cpu-cycles event instead of gettimeofday syscall.
+
+-o::
+--only-prefault::
+Show only the result with page faults before memset.
+
+-n::
+--no-prefault::
+Show only the result without page faults before memset.
+
 SEE ALSO
 --------
 linkperf:perf[1]
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 71557225bf92..d990365cafa0 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -32,13 +32,13 @@ static bool		no_prefault;
 static const struct option options[] = {
 	OPT_STRING('l', "length", &length_str, "1MB",
 		    "Specify length of memory to copy. "
-		    "available unit: B, MB, GB (upper and lower)"),
+		    "Available units: B, KB, MB, GB and TB (upper and lower)"),
 	OPT_STRING('r', "routine", &routine, "default",
 		    "Specify routine to copy"),
 	OPT_INTEGER('i', "iterations", &iterations,
 		    "repeat memcpy() invocation this number of times"),
 	OPT_BOOLEAN('c', "clock", &use_clock,
-		    "Use CPU clock for measuring"),
+		    "Use cycles event instead of gettimeofday() for measuring"),
 	OPT_BOOLEAN('o', "only-prefault", &only_prefault,
 		    "Show only the result with page faults before memcpy()"),
 	OPT_BOOLEAN('n', "no-prefault", &no_prefault,
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
index e9079185bd72..bf0d5f552017 100644
--- a/tools/perf/bench/mem-memset.c
+++ b/tools/perf/bench/mem-memset.c
@@ -31,14 +31,14 @@ static bool		no_prefault;
 
 static const struct option options[] = {
 	OPT_STRING('l', "length", &length_str, "1MB",
-		    "Specify length of memory to copy. "
-		    "available unit: B, MB, GB (upper and lower)"),
+		    "Specify length of memory to set. "
+		    "Available units: B, KB, MB, GB and TB (upper and lower)"),
 	OPT_STRING('r', "routine", &routine, "default",
-		    "Specify routine to copy"),
+		    "Specify routine to set"),
 	OPT_INTEGER('i', "iterations", &iterations,
 		    "repeat memset() invocation this number of times"),
 	OPT_BOOLEAN('c', "clock", &use_clock,
-		    "Use CPU clock for measuring"),
+		    "Use cycles event instead of gettimeofday() for measuring"),
 	OPT_BOOLEAN('o', "only-prefault", &only_prefault,
 		    "Show only the result with page faults before memset()"),
 	OPT_BOOLEAN('n', "no-prefault", &no_prefault,
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index b0e74ab2d7a2..1f3100216448 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -33,7 +33,7 @@ struct bench_suite {
 };
 						\
 /* sentinel: easy for help */
-#define suite_all { "all", "test all suite (pseudo suite)", NULL }
+#define suite_all { "all", "Test all benchmark suites", NULL }
 
 static struct bench_suite sched_suites[] = {
 	{ "messaging",
@@ -75,7 +75,7 @@ static struct bench_subsys subsystems[] = {
 	  "memory access performance",
 	  mem_suites },
 	{ "all",		/* sentinel: easy for help */
-	  "test all subsystem (pseudo subsystem)",
+	  "all benchmark subsystem",
 	  NULL },
 	{ NULL,
 	  NULL,
-- 
cgit v1.2.3


From 300aa941650e98966ad85847527537df5b11a87e Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 11 Jun 2012 13:48:41 -0600
Subject: perf report: Delay sample_type checks in pipe mode

The pipeline:
  perf record -a -g -o - sleep 5 |perf inject -v -b  | perf report  -g -i -

generates the warning:
  Selected -g but no callchain data. Did you call 'perf record' without -g?

The problem is that the header data is not written to the pipe, so the
sample_type has not been available when perf_report__setup_sample_type
is called. For pipe mode, record dumps the sample type as part of the
synthesized events stream -- perf_event__synthesize_attrs(). Handle this
be detecting pipe mode and not doing early sanity checks on sample_type.

Signed-off-by: David Ahern <dsahern@gmail.com>
Tested-by: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Link: http://lkml.kernel.org/r/1339444121-26236-1-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-report.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 40b0ffc3ad3b..69b1c1185159 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -245,11 +245,12 @@ static int process_read_event(struct perf_tool *tool,
 	return 0;
 }
 
+/* For pipe mode, sample_type is not currently set */
 static int perf_report__setup_sample_type(struct perf_report *rep)
 {
 	struct perf_session *self = rep->session;
 
-	if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
+	if (!self->fd_pipe && !(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
 		if (sort__has_parent) {
 			ui__error("Selected --sort parent, but no "
 				    "callchain data. Did you call "
@@ -272,7 +273,8 @@ static int perf_report__setup_sample_type(struct perf_report *rep)
 	}
 
 	if (sort__branch_mode == 1) {
-		if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) {
+		if (!self->fd_pipe &&
+		    !(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) {
 			ui__error("Selected -b but no branch data. "
 				  "Did you call perf record without -b?\n");
 			return -1;
-- 
cgit v1.2.3


From d9873ab79376d5c0112ed09e14783067dc65e808 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 11 Jun 2012 19:13:32 -0600
Subject: perf tools: Trivial build fix

References to OUTPUT should not be followed by a '/'. When a build
output directory is not specified for this case you get:

gcc -o builtin-annotate.o -c ... -I/util ...

which is wrong.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Link: http://lkml.kernel.org/r/1339463612-30937-1-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index d698c118a602..75d74e5db8d5 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -155,7 +155,7 @@ endif
 
 ### --- END CONFIGURATION SECTION ---
 
-BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)/util -I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
+BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)util -I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
 BASIC_LDFLAGS =
 
 # Guard against environment variables
-- 
cgit v1.2.3


From 0be61ebc18b919dddbdbcd1c4f42513c310ecf59 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 18 Jun 2012 09:28:16 -0400
Subject: tracing/selftest: Add a WARN_ON() if a tracer test fails

Add a WARN_ON() output on test failures so that they are easier to detect
in automated tests. Although, the WARN_ON() will not print if the test
causes the system to crash, obviously.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 49249c28690d..748f6401edf6 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -830,6 +830,8 @@ int register_tracer(struct tracer *type)
 		current_trace = saved_tracer;
 		if (ret) {
 			printk(KERN_CONT "FAILED!\n");
+			/* Add the warning after printing 'FAILED' */
+			WARN_ON(1);
 			goto out;
 		}
 		/* Only reset on passing, to avoid touching corrupted buffers */
-- 
cgit v1.2.3


From b102f1d0f1cd0bb5ec82e5aeb1e33502d6ad6710 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Wed, 27 Jun 2012 09:41:39 +0900
Subject: tracing/kvm: Use __print_hex() for kvm_emulate_insn tracepoint

The kvm_emulate_insn tracepoint used __print_insn()
for printing its instructions. However it makes the
format of the event hard to parse as it reveals TP
internals.

Fortunately, kernel provides __print_hex for almost
same purpose, we can use it instead of open coding
it. The user-space can be changed to parse it later.

That means raw kernel tracing will not be affected
by this change:

 # cd /sys/kernel/debug/tracing/
 # cat events/kvm/kvm_emulate_insn/format
 name: kvm_emulate_insn
 ID: 29
 format:
	...
 print fmt: "%x:%llx:%s (%s)%s", REC->csbase, REC->rip, __print_hex(REC->insn, REC->len), \
 __print_symbolic(REC->flags, { 0, "real" }, { (1 << 0) | (1 << 1), "vm16" }, \
 { (1 << 0), "prot16" }, { (1 << 0) | (1 << 2), "prot32" }, { (1 << 0) | (1 << 3), "prot64" }), \
 REC->failed ? " failed" : ""

 # echo 1 > events/kvm/kvm_emulate_insn/enable
 # cat trace
 # tracer: nop
 #
 # entries-in-buffer/entries-written: 2183/2183   #P:12
 #
 #                              _-----=> irqs-off
 #                             / _----=> need-resched
 #                            | / _---=> hardirq/softirq
 #                            || / _--=> preempt-depth
 #                            ||| /     delay
 #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
 #              | |       |   ||||       |         |
         qemu-kvm-1782  [002] ...1   140.931636: kvm_emulate_insn: 0:c102fa25:89 10 (prot32)
         qemu-kvm-1781  [004] ...1   140.931637: kvm_emulate_insn: 0:c102fa25:89 10 (prot32)

Link: http://lkml.kernel.org/n/tip-wfw6y3b9ugtey8snaow9nmg5@git.kernel.org
Link: http://lkml.kernel.org/r/1340757701-10711-2-git-send-email-namhyung@kernel.org

Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: kvm@vger.kernel.org
Acked-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kvm/trace.h   | 12 +-----------
 include/trace/ftrace.h |  1 +
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 911d2641f14c..62d02e3c3ed6 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -710,16 +710,6 @@ TRACE_EVENT(kvm_skinit,
 		  __entry->rip, __entry->slb)
 );
 
-#define __print_insn(insn, ilen) ({		                 \
-	int i;							 \
-	const char *ret = p->buffer + p->len;			 \
-								 \
-	for (i = 0; i < ilen; ++i)				 \
-		trace_seq_printf(p, " %02x", insn[i]);		 \
-	trace_seq_printf(p, "%c", 0);				 \
-	ret;							 \
-	})
-
 #define KVM_EMUL_INSN_F_CR0_PE (1 << 0)
 #define KVM_EMUL_INSN_F_EFL_VM (1 << 1)
 #define KVM_EMUL_INSN_F_CS_D   (1 << 2)
@@ -786,7 +776,7 @@ TRACE_EVENT(kvm_emulate_insn,
 
 	TP_printk("%x:%llx:%s (%s)%s",
 		  __entry->csbase, __entry->rip,
-		  __print_insn(__entry->insn, __entry->len),
+		  __print_hex(__entry->insn, __entry->len),
 		  __print_symbolic(__entry->flags,
 				   kvm_trace_symbol_emul_flags),
 		  __entry->failed ? " failed" : ""
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 769724944fc6..c6bc2faaf261 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -571,6 +571,7 @@ static inline void ftrace_test_probe_##call(void)			\
 
 #undef __print_flags
 #undef __print_symbolic
+#undef __print_hex
 #undef __get_dynamic_array
 #undef __get_str
 
-- 
cgit v1.2.3


From 6d158a813efcd09661c23f16ddf7e2ff834cb20c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 27 Jun 2012 20:46:14 -0400
Subject: tracing: Remove NR_CPUS array from trace_iterator

Replace the NR_CPUS array of buffer_iter from the trace_iterator
with an allocated array. This will just create an array of
possible CPUS instead of the max number specified.

The use of NR_CPUS in that array caused allocation failures for
machines that were tight on memory. This did not cause any failures
to the system itself (no crashes), but caused unnecessary failures
for reading the trace files.

Added a helper function called 'trace_buffer_iter()' that returns
the buffer_iter item or NULL if it is not defined or the array was
not allocated. Some routines do not require the array
(tracing_open_pipe() for one).

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h         |  2 +-
 kernel/trace/trace.c                 | 27 ++++++++++++++++++---------
 kernel/trace/trace.h                 |  8 ++++++++
 kernel/trace/trace_functions_graph.c |  2 +-
 4 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 1aff18346c71..af961d6f7ab1 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -65,7 +65,7 @@ struct trace_iterator {
 	void			*private;
 	int			cpu_file;
 	struct mutex		mutex;
-	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
+	struct ring_buffer_iter	**buffer_iter;
 	unsigned long		iter_flags;
 
 	/* trace_seq for __print_flags() and __print_symbolic() etc. */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 748f6401edf6..b2af14e94c28 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1710,9 +1710,11 @@ EXPORT_SYMBOL_GPL(trace_vprintk);
 
 static void trace_iterator_increment(struct trace_iterator *iter)
 {
+	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
+
 	iter->idx++;
-	if (iter->buffer_iter[iter->cpu])
-		ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+	if (buf_iter)
+		ring_buffer_read(buf_iter, NULL);
 }
 
 static struct trace_entry *
@@ -1720,7 +1722,7 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
 		unsigned long *lost_events)
 {
 	struct ring_buffer_event *event;
-	struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
+	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
 
 	if (buf_iter)
 		event = ring_buffer_iter_peek(buf_iter, ts);
@@ -1858,10 +1860,10 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
 
 	tr->data[cpu]->skipped_entries = 0;
 
-	if (!iter->buffer_iter[cpu])
+	buf_iter = trace_buffer_iter(iter, cpu);
+	if (!buf_iter)
 		return;
 
-	buf_iter = iter->buffer_iter[cpu];
 	ring_buffer_iter_reset(buf_iter);
 
 	/*
@@ -2207,13 +2209,15 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 
 int trace_empty(struct trace_iterator *iter)
 {
+	struct ring_buffer_iter *buf_iter;
 	int cpu;
 
 	/* If we are looking at one CPU buffer, only check that one */
 	if (iter->cpu_file != TRACE_PIPE_ALL_CPU) {
 		cpu = iter->cpu_file;
-		if (iter->buffer_iter[cpu]) {
-			if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
+		buf_iter = trace_buffer_iter(iter, cpu);
+		if (buf_iter) {
+			if (!ring_buffer_iter_empty(buf_iter))
 				return 0;
 		} else {
 			if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
@@ -2223,8 +2227,9 @@ int trace_empty(struct trace_iterator *iter)
 	}
 
 	for_each_tracing_cpu(cpu) {
-		if (iter->buffer_iter[cpu]) {
-			if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
+		buf_iter = trace_buffer_iter(iter, cpu);
+		if (buf_iter) {
+			if (!ring_buffer_iter_empty(buf_iter))
 				return 0;
 		} else {
 			if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
@@ -2383,6 +2388,8 @@ __tracing_open(struct inode *inode, struct file *file)
 	if (!iter)
 		return ERR_PTR(-ENOMEM);
 
+	iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
+				    GFP_KERNEL);
 	/*
 	 * We make a copy of the current tracer to avoid concurrent
 	 * changes on it while we are reading.
@@ -2443,6 +2450,7 @@ __tracing_open(struct inode *inode, struct file *file)
  fail:
 	mutex_unlock(&trace_types_lock);
 	kfree(iter->trace);
+	kfree(iter->buffer_iter);
 	seq_release_private(inode, file);
 	return ERR_PTR(-ENOMEM);
 }
@@ -2483,6 +2491,7 @@ static int tracing_release(struct inode *inode, struct file *file)
 	mutex_destroy(&iter->mutex);
 	free_cpumask_var(iter->started);
 	kfree(iter->trace);
+	kfree(iter->buffer_iter);
 	seq_release_private(inode, file);
 	return 0;
 }
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 5aec220d2de0..55e1f7f0db12 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -317,6 +317,14 @@ struct tracer {
 
 #define TRACE_PIPE_ALL_CPU	-1
 
+static inline struct ring_buffer_iter *
+trace_buffer_iter(struct trace_iterator *iter, int cpu)
+{
+	if (iter->buffer_iter && iter->buffer_iter[cpu])
+		return iter->buffer_iter[cpu];
+	return NULL;
+}
+
 int tracer_init(struct tracer *t, struct trace_array *tr);
 int tracing_is_enabled(void);
 void trace_wake_up(void);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index a7d2a4c653d8..ce27c8ba8d31 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -538,7 +538,7 @@ get_return_for_leaf(struct trace_iterator *iter,
 		next = &data->ret;
 	} else {
 
-		ring_iter = iter->buffer_iter[iter->cpu];
+		ring_iter = trace_buffer_iter(iter, iter->cpu);
 
 		/* First peek to compare current entry and the next one */
 		if (ring_iter)
-- 
cgit v1.2.3


From a5fb833172eca69136e9ee1ada778e404086ab8a Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 28 Jun 2012 13:35:04 -0400
Subject: ring-buffer: Fix uninitialized read_stamp

The ring buffer reader page is used to swap a page from the writable
ring buffer. If the writer happens to be on that page, it ends up on the
reader page, but will simply move off of it, back into the writable ring
buffer as writes are added.

The time stamp passed back to the readers is stored in the cpu_buffer per
CPU descriptor. This stamp is updated when a swap of the reader page takes
place, and it reads the current stamp from the page taken from the writable
ring buffer. Everytime a writer goes to a new page, it updates the time stamp
of that page.

The problem happens if a reader reads a page from an empty per CPU ring buffer.
If the buffer is empty, the swap still takes place, placing the writer at the
start of the reader page. If at a later time, a write happens, it updates the
page's time stamp and continues. But the problem is that the read_stamp does
not get updated, because the page was already swapped.

The solution to this was to not swap the page if the ring buffer happens to
be empty. This also removes the side effect that the writes on the reader
page will not get updated because the writer never gets back on the reader
page without a swap. That is, if a read happens on an empty buffer, but then
no reads happen for a while. If a swap took place, and the writer were to start
writing a lot of data (function tracer), it will start overflowing the ring buffer
and overwrite the older data. But because the writer never goes back onto the
reader page, the data left on the reader page never gets overwritten. This
causes the reader to see really old data, followed by a jump to newer data.

Link: http://lkml.kernel.org/r/1340060577-9112-1-git-send-email-dhsharp@google.com
Google-Bug-Id: 6410455
Reported-by: David Sharp <dhsharp@google.com>
tested-by: David Sharp <dhsharp@google.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1d0f6a8a0e5e..82a3e0c56b1d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3239,6 +3239,10 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	if (cpu_buffer->commit_page == cpu_buffer->reader_page)
 		goto out;
 
+	/* Don't bother swapping if the ring buffer is empty */
+	if (rb_num_of_entries(cpu_buffer) == 0)
+		goto out;
+
 	/*
 	 * Reset the reader page to size zero.
 	 */
-- 
cgit v1.2.3


From f526a4ce2643e2636c091cf2bf0ec1442110c5b7 Mon Sep 17 00:00:00 2001
From: Konstantin Stepanyuk <konstantin.stepanyuk@gmail.com>
Date: Wed, 27 Jun 2012 15:52:14 -0600
Subject: tools lib traceevent: Fix clean target in Makefile

Dependency files were not cleaned up. Add missing space to fix the issue.

Signed-off-by: Konstantin Stepanyuk <konstantin.stepanyuk@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1340833934-18783-1-git-send-email-konstantin.stepanyuk@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index 3d69aa9ff51e..423f4b81ecce 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -290,7 +290,7 @@ install_lib: all_cmd install_plugins install_python
 install: install_lib
 
 clean:
-	$(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES).*.d
+	$(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d
 	$(RM) tags TAGS
 
 endif # skip-makefile
-- 
cgit v1.2.3


From 6545e3a8f0666b60b26202a5271a94f7cd9601a8 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Fri, 22 Jun 2012 17:10:14 +0900
Subject: tools lib traceevent: Teach [ce]tags about libtraceeevent error codes

As we use a macro trick to sync each error codes with its description
string, teach [ce]tags to process them properly.

This patch modifies the libtraceevent's Makefile not a kernel one.

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/n/tip-3101nsbg52glxdqih291qj74@git.kernel.org
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1340352615-20737-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/Makefile | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index 423f4b81ecce..34a577e7d554 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -270,11 +270,13 @@ endif
 
 tags:	force
 	$(RM) tags
-	find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px
+	find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
+	--regex-c++='/_PE\(([^,)]*).*/PEVENT_ERRNO__\1/'
 
 TAGS:	force
 	$(RM) TAGS
-	find . -name '*.[ch]' | xargs etags
+	find . -name '*.[ch]' | xargs etags \
+	--regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/'
 
 define do_install
 	$(print_install)				\
-- 
cgit v1.2.3


From 860df5833e461beba4bf9f3304a7919da98fd789 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Fri, 22 Jun 2012 14:37:36 +0900
Subject: tools lib traceevent: Make dependency files regeneratable

Ingo reported that libtraceevent doesn't clean out dependency (.d) files
and it can cause a build error when the libgcc package upgraded:

 comet:~/tip/tools/perf> make -j
     SUBDIR ../lib/traceevent/
 make[1]: *** No rule to make target `/usr/lib/gcc/x86_64-redhat-linux/4.7.0/include/stddef.h',
  needed by `event-parse.o'.  Stop.
 make: *** [../lib/traceevent//libtraceevent.a] Error 2

So this patch makes the .d files depends on the source and header files
also, so that it can be re-generated as needed.

NOTE: This code is copied from the GNU make manual page
(4.14 Generating Prerequisites Automatically).

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1340343462-15556-3-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/Makefile | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index 34a577e7d554..46c2f6b7b123 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -250,8 +250,12 @@ endef
 all_objs := $(sort $(ALL_OBJS))
 all_deps := $(all_objs:%.o=.%.d)
 
+# let .d file also depends on the source and header files
 define check_deps
-		$(CC) -M $(CFLAGS) $< > $@;
+		@set -e; $(RM) $@; \
+		$(CC) -M $(CFLAGS) $< > $@.$$$$; \
+		sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
+		$(RM) $@.$$$$
 endef
 
 $(gui_deps): ks_version.h
-- 
cgit v1.2.3


From 600da3cfe19496485c5d8d52ff703590a0bd53f6 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Fri, 22 Jun 2012 17:10:15 +0900
Subject: tools lib traceevent: Check string is really printable

When libtraceevent parses format fields, it assumes that array of 1 byte
is string but it's not always true. The kvm_emulate_insn contains 15 u8
array of insn that contains (binary) instructions. Thus when it's
printed, it'll have broken output like below:

  kvm_emulate_insn:     [FAILED TO PARSE] rip=3238197797 csbase=0 len=2 \
  insn=<89>P^]<B4>& flags=5 failed=0

With this patch:

  kvm_emulate_insn:     [FAILED TO PARSE] rip=3238197797 csbase=0 len=2 \
  insn=ARRAY[89, 10, 5d, c3, 8d, b4, 26, 00, 00, 00, 00, 55, 89, e5, 3e] flags=5 failed=0

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1340352615-20737-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 554828219c33..b203a50c0f22 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -3589,6 +3589,16 @@ static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size,
 	trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
 }
 
+static int is_printable_array(char *p, unsigned int len)
+{
+	unsigned int i;
+
+	for (i = 0; i < len && p[i]; i++)
+		if (!isprint(p[i]))
+		    return 0;
+	return 1;
+}
+
 static void print_event_fields(struct trace_seq *s, void *data, int size,
 			       struct event_format *event)
 {
@@ -3608,7 +3618,8 @@ static void print_event_fields(struct trace_seq *s, void *data, int size,
 				len = offset >> 16;
 				offset &= 0xffff;
 			}
-			if (field->flags & FIELD_IS_STRING) {
+			if (field->flags & FIELD_IS_STRING &&
+			    is_printable_array(data + offset, len)) {
 				trace_seq_printf(s, "%s", (char *)data + offset);
 			} else {
 				trace_seq_puts(s, "ARRAY[");
@@ -3619,6 +3630,7 @@ static void print_event_fields(struct trace_seq *s, void *data, int size,
 							 *((unsigned char *)data + offset + i));
 				}
 				trace_seq_putc(s, ']');
+				field->flags &= ~FIELD_IS_STRING;
 			}
 		} else {
 			val = pevent_read_number(event->pevent, data + field->offset,
-- 
cgit v1.2.3


From b700807196ac8d87e00fed9fda80ab89b7f56db6 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Wed, 27 Jun 2012 09:41:40 +0900
Subject: tools lib traceevent: Use local variable 'field'

Use local variable 'field' to reduce typing. It is needed by later patch
not to exceed 80 column.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1340757701-10711-3-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index b203a50c0f22..63d02be5480f 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -3214,6 +3214,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 {
 	struct pevent *pevent = event->pevent;
 	struct print_flag_sym *flag;
+	struct format_field *field;
 	unsigned long long val, fval;
 	unsigned long addr;
 	char *str;
@@ -3228,27 +3229,29 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 		print_str_to_seq(s, format, len_arg, arg->atom.atom);
 		return;
 	case PRINT_FIELD:
-		if (!arg->field.field) {
-			arg->field.field = pevent_find_any_field(event, arg->field.name);
-			if (!arg->field.field)
+		field = arg->field.field;
+		if (!field) {
+			field = pevent_find_any_field(event, arg->field.name);
+			if (!field)
 				die("field %s not found", arg->field.name);
+			arg->field.field = field;
 		}
 		/* Zero sized fields, mean the rest of the data */
-		len = arg->field.field->size ? : size - arg->field.field->offset;
+		len = field->size ? : size - field->offset;
 
 		/*
 		 * Some events pass in pointers. If this is not an array
 		 * and the size is the same as long_size, assume that it
 		 * is a pointer.
 		 */
-		if (!(arg->field.field->flags & FIELD_IS_ARRAY) &&
-		    arg->field.field->size == pevent->long_size) {
-			addr = *(unsigned long *)(data + arg->field.field->offset);
+		if (!(field->flags & FIELD_IS_ARRAY) &&
+		    field->size == pevent->long_size) {
+			addr = *(unsigned long *)(data + field->offset);
 			trace_seq_printf(s, "%lx", addr);
 			break;
 		}
 		str = malloc_or_die(len + 1);
-		memcpy(str, data + arg->field.field->offset, len);
+		memcpy(str, data + field->offset, len);
 		str[len] = 0;
 		print_str_to_seq(s, format, len_arg, str);
 		free(str);
-- 
cgit v1.2.3


From e080e6f1c863242ff709046d0486d09c46dc484a Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Wed, 27 Jun 2012 09:41:41 +0900
Subject: tools lib traceevent: Add support for __print_hex()

Since the __print_hex() function is used in print fmt now, add
corresponding parser routines. This makes the output of perf script on
the kvm_emulate_insn event not to fail any more.

 before:
      kvm_emulate_insn:     [FAILED TO PARSE] rip=3238197797 ...

 after:
      kvm_emulate_insn:     0:c102fa25:89 10 (prot32)

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1340757701-10711-4-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c                 | 75 +++++++++++++++++++++-
 tools/lib/traceevent/event-parse.h                 |  7 ++
 .../perf/util/scripting-engines/trace-event-perl.c |  4 ++
 .../util/scripting-engines/trace-event-python.c    |  4 ++
 4 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 63d02be5480f..b1abd39923d6 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -697,6 +697,10 @@ static void free_arg(struct print_arg *arg)
 		free_arg(arg->symbol.field);
 		free_flag_sym(arg->symbol.symbols);
 		break;
+	case PRINT_HEX:
+		free_arg(arg->hex.field);
+		free_arg(arg->hex.size);
+		break;
 	case PRINT_TYPE:
 		free(arg->typecast.type);
 		free_arg(arg->typecast.item);
@@ -2259,6 +2263,45 @@ process_symbols(struct event_format *event, struct print_arg *arg, char **tok)
 	return EVENT_ERROR;
 }
 
+static enum event_type
+process_hex(struct event_format *event, struct print_arg *arg, char **tok)
+{
+	struct print_arg *field;
+	enum event_type type;
+	char *token;
+
+	memset(arg, 0, sizeof(*arg));
+	arg->type = PRINT_HEX;
+
+	field = alloc_arg();
+	type = process_arg(event, field, &token);
+
+	if (test_type_token(type, token, EVENT_DELIM, ","))
+		goto out_free;
+
+	arg->hex.field = field;
+
+	free_token(token);
+
+	field = alloc_arg();
+	type = process_arg(event, field, &token);
+
+	if (test_type_token(type, token, EVENT_DELIM, ")"))
+		goto out_free;
+
+	arg->hex.size = field;
+
+	free_token(token);
+	type = read_token_item(tok);
+	return type;
+
+ out_free:
+	free_arg(field);
+	free_token(token);
+	*tok = NULL;
+	return EVENT_ERROR;
+}
+
 static enum event_type
 process_dynamic_array(struct event_format *event, struct print_arg *arg, char **tok)
 {
@@ -2488,6 +2531,10 @@ process_function(struct event_format *event, struct print_arg *arg,
 		is_symbolic_field = 1;
 		return process_symbols(event, arg, tok);
 	}
+	if (strcmp(token, "__print_hex") == 0) {
+		free_token(token);
+		return process_hex(event, arg, tok);
+	}
 	if (strcmp(token, "__get_str") == 0) {
 		free_token(token);
 		return process_str(event, arg, tok);
@@ -2995,6 +3042,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg
 		break;
 	case PRINT_FLAGS:
 	case PRINT_SYMBOL:
+	case PRINT_HEX:
 		break;
 	case PRINT_TYPE:
 		val = eval_num_arg(data, size, event, arg->typecast.item);
@@ -3218,8 +3266,9 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 	unsigned long long val, fval;
 	unsigned long addr;
 	char *str;
+	unsigned char *hex;
 	int print;
-	int len;
+	int i, len;
 
 	switch (arg->type) {
 	case PRINT_NULL:
@@ -3284,6 +3333,23 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 			}
 		}
 		break;
+	case PRINT_HEX:
+		field = arg->hex.field->field.field;
+		if (!field) {
+			str = arg->hex.field->field.name;
+			field = pevent_find_any_field(event, str);
+			if (!field)
+				die("field %s not found", str);
+			arg->hex.field->field.field = field;
+		}
+		hex = data + field->offset;
+		len = eval_num_arg(data, size, event, arg->hex.size);
+		for (i = 0; i < len; i++) {
+			if (i)
+				trace_seq_putc(s, ' ');
+			trace_seq_printf(s, "%02x", hex[i]);
+		}
+		break;
 
 	case PRINT_TYPE:
 		break;
@@ -4294,6 +4360,13 @@ static void print_args(struct print_arg *args)
 		trace_seq_destroy(&s);
 		printf(")");
 		break;
+	case PRINT_HEX:
+		printf("__print_hex(");
+		print_args(args->hex.field);
+		printf(", ");
+		print_args(args->hex.size);
+		printf(")");
+		break;
 	case PRINT_STRING:
 	case PRINT_BSTRING:
 		printf("__get_str(%s)", args->string.string);
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index ac997bc7b592..5772ad8cb386 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -226,6 +226,11 @@ struct print_arg_symbol {
 	struct print_flag_sym	*symbols;
 };
 
+struct print_arg_hex {
+	struct print_arg	*field;
+	struct print_arg	*size;
+};
+
 struct print_arg_dynarray {
 	struct format_field	*field;
 	struct print_arg	*index;
@@ -253,6 +258,7 @@ enum print_arg_type {
 	PRINT_FIELD,
 	PRINT_FLAGS,
 	PRINT_SYMBOL,
+	PRINT_HEX,
 	PRINT_TYPE,
 	PRINT_STRING,
 	PRINT_BSTRING,
@@ -270,6 +276,7 @@ struct print_arg {
 		struct print_arg_typecast	typecast;
 		struct print_arg_flags		flags;
 		struct print_arg_symbol		symbol;
+		struct print_arg_hex		hex;
 		struct print_arg_func		func;
 		struct print_arg_string		string;
 		struct print_arg_op		op;
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index b3620fe12763..02dfa19a467f 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -209,6 +209,10 @@ static void define_event_symbols(struct event_format *event,
 		define_symbolic_values(args->symbol.symbols, ev_name,
 				       cur_field_name);
 		break;
+	case PRINT_HEX:
+		define_event_symbols(event, ev_name, args->hex.field);
+		define_event_symbols(event, ev_name, args->hex.size);
+		break;
 	case PRINT_BSTRING:
 	case PRINT_DYNAMIC_ARRAY:
 	case PRINT_STRING:
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index a8ca2f8179a9..ce4d1b0c3862 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -166,6 +166,10 @@ static void define_event_symbols(struct event_format *event,
 		define_values(PRINT_SYMBOL, args->symbol.symbols, ev_name,
 			      cur_field_name);
 		break;
+	case PRINT_HEX:
+		define_event_symbols(event, ev_name, args->hex.field);
+		define_event_symbols(event, ev_name, args->hex.size);
+		break;
 	case PRINT_STRING:
 		break;
 	case PRINT_TYPE:
-- 
cgit v1.2.3


From 50d8f9e393c5a1a8864fda75e3a9f2b800497a61 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Mon, 11 Jun 2012 15:28:53 +0900
Subject: tools lib traceevent: Replace malloc_or_die to plain malloc in
 alloc_event()

Because the only caller of the alloc_event() (pevent_parse_event) checks
return value properly, it can be changed to use plain malloc.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1339396133-9839-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index b1abd39923d6..83f0a8add177 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -616,7 +616,9 @@ static struct event_format *alloc_event(void)
 {
 	struct event_format *event;
 
-	event = malloc_or_die(sizeof(*event));
+	event = malloc(sizeof(*event));
+	if (!event)
+		return NULL;
 	memset(event, 0, sizeof(*event));
 
 	return event;
-- 
cgit v1.2.3


From 7582732f57e547929d4c65ad8e38f3446e0538d8 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 29 Jun 2012 09:22:54 +0200
Subject: perf tools: Fix hw breakpoint's type modifier parsing

Fixing the hw breakpoint's type modifier parsing to allow all possible
combinations of 'rwx' characters.

Adding automated tests to the parsing test suite.

Reported-by: Jovi Zhang <bookjovi@gmail.com>
Original-patch-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Jovi Zhang <bookjovi@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120629072254.GA940@krava.brq.redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events-test.c | 37 +++++++++++++++++++++++++++++++++++++
 tools/perf/util/parse-events.c      | 16 +++++++++++++---
 tools/perf/util/parse-events.l      |  2 +-
 3 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c
index a0f61a2a6835..de81fe1f9329 100644
--- a/tools/perf/util/parse-events-test.c
+++ b/tools/perf/util/parse-events-test.c
@@ -181,6 +181,22 @@ static int test__checkevent_breakpoint_w(struct perf_evlist *evlist)
 	return 0;
 }
 
+static int test__checkevent_breakpoint_rw(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = list_entry(evlist->entries.next,
+					      struct perf_evsel, node);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type",
+			PERF_TYPE_BREAKPOINT == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong bp_type",
+		(HW_BREAKPOINT_R|HW_BREAKPOINT_W) == evsel->attr.bp_type);
+	TEST_ASSERT_VAL("wrong bp_len",
+			HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len);
+	return 0;
+}
+
 static int test__checkevent_tracepoint_modifier(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel = list_entry(evlist->entries.next,
@@ -352,6 +368,19 @@ static int test__checkevent_breakpoint_w_modifier(struct perf_evlist *evlist)
 	return test__checkevent_breakpoint_w(evlist);
 }
 
+static int test__checkevent_breakpoint_rw_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = list_entry(evlist->entries.next,
+					      struct perf_evsel, node);
+
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+
+	return test__checkevent_breakpoint_rw(evlist);
+}
+
 static int test__checkevent_pmu(struct perf_evlist *evlist)
 {
 
@@ -585,6 +614,14 @@ static struct test__event_st test__events[] = {
 		.name  = "instructions:H",
 		.check = test__checkevent_exclude_guest_modifier,
 	},
+	[26] = {
+		.name  = "mem:0:rw",
+		.check = test__checkevent_breakpoint_rw,
+	},
+	[27] = {
+		.name  = "mem:0:rw:kp",
+		.check = test__checkevent_breakpoint_rw_modifier,
+	},
 };
 
 #define TEST__EVENTS_CNT (sizeof(test__events) / sizeof(struct test__event_st))
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 0cc27da30ddb..7ae76af709f1 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -383,21 +383,31 @@ parse_breakpoint_type(const char *type, struct perf_event_attr *attr)
 		if (!type || !type[i])
 			break;
 
+#define CHECK_SET_TYPE(bit)		\
+do {					\
+	if (attr->bp_type & bit)	\
+		return -EINVAL;		\
+	else				\
+		attr->bp_type |= bit;	\
+} while (0)
+
 		switch (type[i]) {
 		case 'r':
-			attr->bp_type |= HW_BREAKPOINT_R;
+			CHECK_SET_TYPE(HW_BREAKPOINT_R);
 			break;
 		case 'w':
-			attr->bp_type |= HW_BREAKPOINT_W;
+			CHECK_SET_TYPE(HW_BREAKPOINT_W);
 			break;
 		case 'x':
-			attr->bp_type |= HW_BREAKPOINT_X;
+			CHECK_SET_TYPE(HW_BREAKPOINT_X);
 			break;
 		default:
 			return -EINVAL;
 		}
 	}
 
+#undef CHECK_SET_TYPE
+
 	if (!attr->bp_type) /* Default */
 		attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
 
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 488362e14133..a06689474210 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -76,7 +76,7 @@ num_hex		0x[a-fA-F0-9]+
 num_raw_hex	[a-fA-F0-9]+
 name		[a-zA-Z_*?][a-zA-Z0-9_*?]*
 modifier_event	[ukhpGH]{1,8}
-modifier_bp	[rwx]
+modifier_bp	[rwx]{1,3}
 
 %%
 
-- 
cgit v1.2.3


From 287e74aa3db097469bdca401f33f00ef20dc710d Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 28 Jun 2012 23:18:49 +0200
Subject: perf evsel: Handle hw breakpoints event names in perf_evsel__name()

Adding hw breakpoint events hook in the perf_evsel__name function, to
display event names properly all over the perf tools.

Updated hw breakpoints events tests.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Jovi Zhang <bookjovi@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1340918329-3012-3-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c             | 30 ++++++++++++++++++++++++++++++
 tools/perf/util/parse-events-test.c | 10 ++++++++++
 tools/perf/util/parse-events.c      |  4 +---
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 3d1f6968f175..e81771364867 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -15,6 +15,7 @@
 #include "cpumap.h"
 #include "thread_map.h"
 #include "target.h"
+#include "../../../include/linux/hw_breakpoint.h"
 
 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0))
@@ -152,6 +153,31 @@ static int perf_evsel__sw_name(struct perf_evsel *evsel, char *bf, size_t size)
 	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
 }
 
+static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
+{
+	int r;
+
+	r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr);
+
+	if (type & HW_BREAKPOINT_R)
+		r += scnprintf(bf + r, size - r, "r");
+
+	if (type & HW_BREAKPOINT_W)
+		r += scnprintf(bf + r, size - r, "w");
+
+	if (type & HW_BREAKPOINT_X)
+		r += scnprintf(bf + r, size - r, "x");
+
+	return r;
+}
+
+static int perf_evsel__bp_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+	int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
+	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
 const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_EVSEL__MAX_ALIASES] = {
  { "L1-dcache",	"l1-d",		"l1d",		"L1-data",		},
@@ -285,6 +311,10 @@ const char *perf_evsel__name(struct perf_evsel *evsel)
 		scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint");
 		break;
 
+	case PERF_TYPE_BREAKPOINT:
+		perf_evsel__bp_name(evsel, bf, sizeof(bf));
+		break;
+
 	default:
 		scnprintf(bf, sizeof(bf), "%s", "unknown attr type");
 		break;
diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c
index de81fe1f9329..dd0c306a0698 100644
--- a/tools/perf/util/parse-events-test.c
+++ b/tools/perf/util/parse-events-test.c
@@ -325,6 +325,8 @@ static int test__checkevent_breakpoint_modifier(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
 	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong name",
+			!strcmp(perf_evsel__name(evsel), "mem:0x0:rw:u"));
 
 	return test__checkevent_breakpoint(evlist);
 }
@@ -338,6 +340,8 @@ static int test__checkevent_breakpoint_x_modifier(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
 	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong name",
+			!strcmp(perf_evsel__name(evsel), "mem:0x0:x:k"));
 
 	return test__checkevent_breakpoint_x(evlist);
 }
@@ -351,6 +355,8 @@ static int test__checkevent_breakpoint_r_modifier(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
 	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong name",
+			!strcmp(perf_evsel__name(evsel), "mem:0x0:r:hp"));
 
 	return test__checkevent_breakpoint_r(evlist);
 }
@@ -364,6 +370,8 @@ static int test__checkevent_breakpoint_w_modifier(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
 	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong name",
+			!strcmp(perf_evsel__name(evsel), "mem:0x0:w:up"));
 
 	return test__checkevent_breakpoint_w(evlist);
 }
@@ -377,6 +385,8 @@ static int test__checkevent_breakpoint_rw_modifier(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
 	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong name",
+			!strcmp(perf_evsel__name(evsel), "mem:0x0:rw:kp"));
 
 	return test__checkevent_breakpoint_rw(evlist);
 }
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 7ae76af709f1..1dc44dc69133 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -418,7 +418,6 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx,
 				void *ptr, char *type)
 {
 	struct perf_event_attr attr;
-	char name[MAX_NAME_LEN];
 
 	memset(&attr, 0, sizeof(attr));
 	attr.bp_addr = (unsigned long) ptr;
@@ -437,8 +436,7 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx,
 
 	attr.type = PERF_TYPE_BREAKPOINT;
 
-	snprintf(name, MAX_NAME_LEN, "mem:%p:%s", ptr, type ? type : "rw");
-	return add_event(list, idx, &attr, name);
+	return add_event(list, idx, &attr, NULL);
 }
 
 static int config_term(struct perf_event_attr *attr,
-- 
cgit v1.2.3


From 9bc8f9fe2c6e3778202c76ef85ef291567c00cb8 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 14 Jun 2012 22:38:37 +0200
Subject: perf tools: Fix generation of pmu list

The internal pmu list was never used. With each perf_pmu__find() call
the pmu structure was created new by parsing sysfs. Beside this it
caused memory leaks. We now keep all pmus by adding them to the list.

Also, pmu_lookup() should return pmus that do not expose the format
specifier in sysfs.

We need a valid internal pmu list in a later patch to iterate over all
pmus that exist in the system.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1339706321-8802-3-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/pmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 74d0948ec368..67715a42cd6d 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -72,7 +72,7 @@ static int pmu_format(char *name, struct list_head *format)
 		 "%s/bus/event_source/devices/%s/format", sysfs, name);
 
 	if (stat(path, &st) < 0)
-		return -1;
+		return 0;	/* no error if format does not exist */
 
 	if (pmu_format_parse(path, format))
 		return -1;
@@ -252,6 +252,7 @@ static struct perf_pmu *pmu_lookup(char *name)
 	list_splice(&aliases, &pmu->aliases);
 	pmu->name = strdup(name);
 	pmu->type = type;
+	list_add_tail(&pmu->list, &pmus);
 	return pmu;
 }
 
-- 
cgit v1.2.3


From 1388d715dd7d0f494c93dfdef6ab26719218b868 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 19 Jun 2012 17:48:05 +0200
Subject: perf symbols: Add '.note' check into search for NOTE section

Adding '.note' section name to be check when looking for notes section.
The '.note' name is used by kernel VDSO.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Arun Sharma <asharma@fb.com>
Cc: Benjamin Redelings <benjamin.redelings@nescent.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/r/1340120894-9465-15-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 994f4ffdcd05..50958bbeb26a 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1478,14 +1478,31 @@ static int elf_read_build_id(Elf *elf, void *bf, size_t size)
 		goto out;
 	}
 
-	sec = elf_section_by_name(elf, &ehdr, &shdr,
-				  ".note.gnu.build-id", NULL);
-	if (sec == NULL) {
+	/*
+	 * Check following sections for notes:
+	 *   '.note.gnu.build-id'
+	 *   '.notes'
+	 *   '.note' (VDSO specific)
+	 */
+	do {
+		sec = elf_section_by_name(elf, &ehdr, &shdr,
+					  ".note.gnu.build-id", NULL);
+		if (sec)
+			break;
+
 		sec = elf_section_by_name(elf, &ehdr, &shdr,
 					  ".notes", NULL);
-		if (sec == NULL)
-			goto out;
-	}
+		if (sec)
+			break;
+
+		sec = elf_section_by_name(elf, &ehdr, &shdr,
+					  ".note", NULL);
+		if (sec)
+			break;
+
+		return err;
+
+	} while (0);
 
 	data = elf_getdata(sec, NULL);
 	if (data == NULL)
-- 
cgit v1.2.3


From 339ce005091b156c2af4c016c6ba9c1f87cd826a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 19 Jun 2012 17:48:11 +0200
Subject: perf tools: Adding round_up/round_down macros

Adding round_up and round_down macros. They will be used in upcoming
patches.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Arun Sharma <asharma@fb.com>
Cc: Benjamin Redelings <benjamin.redelings@nescent.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/r/1340120894-9465-21-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/include/linux/kernel.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h
index 1eb804fd3fbf..b6842c1d02a8 100644
--- a/tools/perf/util/include/linux/kernel.h
+++ b/tools/perf/util/include/linux/kernel.h
@@ -108,4 +108,14 @@ int eprintf(int level,
 #define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__)
 
+/*
+ * This looks more complex than it should be. But we need to
+ * get the type for the ~ right in round_down (it needs to be
+ * as wide as the result!), and we want to evaluate the macro
+ * arguments just once each.
+ */
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+#define round_down(x, y) ((x) & ~__round_mask(x, y))
+
 #endif
-- 
cgit v1.2.3


From 954e482bde20b0e208fd4d34ef26e10afd194600 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Thu, 24 May 2012 18:19:45 -0700
Subject: x86/copy_user_generic: Optimize copy_user_generic with CPU erms
 feature

According to Intel 64 and IA-32 SDM and Optimization Reference Manual, beginning
with Ivybridge, REG string operation using MOVSB and STOSB can provide both
flexible and high-performance REG string operations in cases like memory copy.
Enhancement availability is indicated by CPUID.7.0.EBX[9] (Enhanced REP MOVSB/
STOSB).

If CPU erms feature is detected, patch copy_user_generic with enhanced fast
string version of copy_user_generic.

A few new macros are defined to reduce duplicate code in ALTERNATIVE and
ALTERNATIVE_2.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1337908785-14015-1-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/alternative.h | 74 ++++++++++++++++++++++++++++++--------
 arch/x86/include/asm/uaccess_64.h  | 11 +++++-
 arch/x86/kernel/x8664_ksyms_64.c   |  1 +
 3 files changed, 70 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 49331bedc158..70780689599a 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -75,23 +75,54 @@ static inline int alternatives_text_reserved(void *start, void *end)
 }
 #endif	/* CONFIG_SMP */
 
+#define OLDINSTR(oldinstr)	"661:\n\t" oldinstr "\n662:\n"
+
+#define b_replacement(number)	"663"#number
+#define e_replacement(number)	"664"#number
+
+#define alt_slen "662b-661b"
+#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
+
+#define ALTINSTR_ENTRY(feature, number)					      \
+	" .long 661b - .\n"				/* label           */ \
+	" .long " b_replacement(number)"f - .\n"	/* new instruction */ \
+	" .word " __stringify(feature) "\n"		/* feature bit     */ \
+	" .byte " alt_slen "\n"				/* source len      */ \
+	" .byte " alt_rlen(number) "\n"			/* replacement len */
+
+#define DISCARD_ENTRY(number)				/* rlen <= slen */    \
+	" .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
+
+#define ALTINSTR_REPLACEMENT(newinstr, feature, number)	/* replacement */     \
+	b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
+
 /* alternative assembly primitive: */
 #define ALTERNATIVE(oldinstr, newinstr, feature)			\
-									\
-      "661:\n\t" oldinstr "\n662:\n"					\
-      ".section .altinstructions,\"a\"\n"				\
-      "	 .long 661b - .\n"			/* label           */	\
-      "	 .long 663f - .\n"			/* new instruction */	\
-      "	 .word " __stringify(feature) "\n"	/* feature bit     */	\
-      "	 .byte 662b-661b\n"			/* sourcelen       */	\
-      "	 .byte 664f-663f\n"			/* replacementlen  */	\
-      ".previous\n"							\
-      ".section .discard,\"aw\",@progbits\n"				\
-      "	 .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */	\
-      ".previous\n"							\
-      ".section .altinstr_replacement, \"ax\"\n"			\
-      "663:\n\t" newinstr "\n664:\n"		/* replacement     */	\
-      ".previous"
+	OLDINSTR(oldinstr)						\
+	".section .altinstructions,\"a\"\n"				\
+	ALTINSTR_ENTRY(feature, 1)					\
+	".previous\n"							\
+	".section .discard,\"aw\",@progbits\n"				\
+	DISCARD_ENTRY(1)						\
+	".previous\n"							\
+	".section .altinstr_replacement, \"ax\"\n"			\
+	ALTINSTR_REPLACEMENT(newinstr, feature, 1)			\
+	".previous"
+
+#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
+	OLDINSTR(oldinstr)						\
+	".section .altinstructions,\"a\"\n"				\
+	ALTINSTR_ENTRY(feature1, 1)					\
+	ALTINSTR_ENTRY(feature2, 2)					\
+	".previous\n"							\
+	".section .discard,\"aw\",@progbits\n"				\
+	DISCARD_ENTRY(1)						\
+	DISCARD_ENTRY(2)						\
+	".previous\n"							\
+	".section .altinstr_replacement, \"ax\"\n"			\
+	ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)			\
+	ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)			\
+	".previous"
 
 /*
  * This must be included *after* the definition of ALTERNATIVE due to
@@ -139,6 +170,19 @@ static inline int alternatives_text_reserved(void *start, void *end)
 	asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \
 		: output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
 
+/*
+ * Like alternative_call, but there are two features and respective functions.
+ * If CPU has feature2, function2 is used.
+ * Otherwise, if CPU has feature1, function1 is used.
+ * Otherwise, old function is used.
+ */
+#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2,   \
+			   output, input...)				      \
+	asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
+		"call %P[new2]", feature2)				      \
+		: output : [old] "i" (oldfunc), [new1] "i" (newfunc1),	      \
+		[new2] "i" (newfunc2), ## input)
+
 /*
  * use this macro(s) if you need more than one output parameter
  * in alternative_io
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 8e796fbbf9c6..d8def8b3dba0 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -17,6 +17,8 @@
 
 /* Handles exceptions in both to and from, but doesn't do access_ok */
 __must_check unsigned long
+copy_user_enhanced_fast_string(void *to, const void *from, unsigned len);
+__must_check unsigned long
 copy_user_generic_string(void *to, const void *from, unsigned len);
 __must_check unsigned long
 copy_user_generic_unrolled(void *to, const void *from, unsigned len);
@@ -26,9 +28,16 @@ copy_user_generic(void *to, const void *from, unsigned len)
 {
 	unsigned ret;
 
-	alternative_call(copy_user_generic_unrolled,
+	/*
+	 * If CPU has ERMS feature, use copy_user_enhanced_fast_string.
+	 * Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
+	 * Otherwise, use copy_user_generic_unrolled.
+	 */
+	alternative_call_2(copy_user_generic_unrolled,
 			 copy_user_generic_string,
 			 X86_FEATURE_REP_GOOD,
+			 copy_user_enhanced_fast_string,
+			 X86_FEATURE_ERMS,
 			 ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
 				     "=d" (len)),
 			 "1" (to), "2" (from), "3" (len)
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 9796c2f3d074..6020f6f5927c 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -28,6 +28,7 @@ EXPORT_SYMBOL(__put_user_8);
 
 EXPORT_SYMBOL(copy_user_generic_string);
 EXPORT_SYMBOL(copy_user_generic_unrolled);
+EXPORT_SYMBOL(copy_user_enhanced_fast_string);
 EXPORT_SYMBOL(__copy_user_nocache);
 EXPORT_SYMBOL(_copy_from_user);
 EXPORT_SYMBOL(_copy_to_user);
-- 
cgit v1.2.3


From c9fc3f778a6a215ace14ee556067c73982b6d40f Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Thu, 21 Jun 2012 14:07:16 +0200
Subject: x86, microcode: Sanitize per-cpu microcode reloading interface

Microcode reloading in a per-core manner is a very bad idea for both
major x86 vendors. And the thing is, we have such interface with which
we can end up with different microcode versions applied on different
cores of an otherwise homogeneous wrt (family,model,stepping) system.

So turn off the possibility of doing that per core and allow it only
system-wide.

This is a minimal fix which we'd like to see in stable too thus the
more-or-less arbitrary decision to allow system-wide reloading only on
the BSP:

$ echo 1 > /sys/devices/system/cpu/cpu0/microcode/reload
...

and disable the interface on the other cores:

$ echo 1 > /sys/devices/system/cpu/cpu23/microcode/reload
-bash: echo: write error: Invalid argument

Also, allowing the reload only from one CPU (the BSP in
that case) doesn't allow the reload procedure to degenerate
into an O(n^2) deal when triggering reloads from all
/sys/devices/system/cpu/cpuX/microcode/reload sysfs nodes
simultaneously.

A more generic fix will follow.

Cc: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1340280437-7718-2-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: <stable@vger.kernel.org>
---
 arch/x86/kernel/microcode_core.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index fbdfc6917180..24b852b61be3 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -298,19 +298,31 @@ static ssize_t reload_store(struct device *dev,
 			    const char *buf, size_t size)
 {
 	unsigned long val;
-	int cpu = dev->id;
-	ssize_t ret = 0;
+	int cpu;
+	ssize_t ret = 0, tmp_ret;
+
+	/* allow reload only from the BSP */
+	if (boot_cpu_data.cpu_index != dev->id)
+		return -EINVAL;
 
 	ret = kstrtoul(buf, 0, &val);
 	if (ret)
 		return ret;
 
-	if (val == 1) {
-		get_online_cpus();
-		if (cpu_online(cpu))
-			ret = reload_for_cpu(cpu);
-		put_online_cpus();
+	if (val != 1)
+		return size;
+
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		tmp_ret = reload_for_cpu(cpu);
+		if (tmp_ret != 0)
+			pr_warn("Error reloading microcode on CPU %d\n", cpu);
+
+		/* save retval of the first encountered reload error */
+		if (!ret)
+			ret = tmp_ret;
 	}
+	put_online_cpus();
 
 	if (!ret)
 		ret = size;
-- 
cgit v1.2.3


From 3d8986bc7f309483ee09c7a02888bab09072c19b Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Thu, 21 Jun 2012 14:07:17 +0200
Subject: x86, microcode: Make reload interface per system

The reload interface should be per-system so that a full system ucode
reload happens (on each core) when doing

echo 1 > /sys/devices/system/cpu/microcode/reload

Move it to the cpu subsys directory instead of it being per-cpu.

Cc: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1340280437-7718-3-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/microcode_core.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 24b852b61be3..947e4c64b1d8 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -301,10 +301,6 @@ static ssize_t reload_store(struct device *dev,
 	int cpu;
 	ssize_t ret = 0, tmp_ret;
 
-	/* allow reload only from the BSP */
-	if (boot_cpu_data.cpu_index != dev->id)
-		return -EINVAL;
-
 	ret = kstrtoul(buf, 0, &val);
 	if (ret)
 		return ret;
@@ -351,7 +347,6 @@ static DEVICE_ATTR(version, 0400, version_show, NULL);
 static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL);
 
 static struct attribute *mc_default_attrs[] = {
-	&dev_attr_reload.attr,
 	&dev_attr_version.attr,
 	&dev_attr_processor_flags.attr,
 	NULL
@@ -528,6 +523,16 @@ static const struct x86_cpu_id microcode_id[] = {
 MODULE_DEVICE_TABLE(x86cpu, microcode_id);
 #endif
 
+static struct attribute *cpu_root_microcode_attrs[] = {
+	&dev_attr_reload.attr,
+	NULL
+};
+
+static struct attribute_group cpu_root_microcode_group = {
+	.name  = "microcode",
+	.attrs = cpu_root_microcode_attrs,
+};
+
 static int __init microcode_init(void)
 {
 	struct cpuinfo_x86 *c = &cpu_data(0);
@@ -559,9 +564,17 @@ static int __init microcode_init(void)
 	if (error)
 		goto out_pdev;
 
+	error = sysfs_create_group(&cpu_subsys.dev_root->kobj,
+				   &cpu_root_microcode_group);
+
+	if (error) {
+		pr_err("Error creating microcode group!\n");
+		goto out_driver;
+	}
+
 	error = microcode_dev_init();
 	if (error)
-		goto out_driver;
+		goto out_ucode_group;
 
 	register_syscore_ops(&mc_syscore_ops);
 	register_hotcpu_notifier(&mc_cpu_notifier);
@@ -571,7 +584,11 @@ static int __init microcode_init(void)
 
 	return 0;
 
-out_driver:
+ out_ucode_group:
+	sysfs_remove_group(&cpu_subsys.dev_root->kobj,
+			   &cpu_root_microcode_group);
+
+ out_driver:
 	get_online_cpus();
 	mutex_lock(&microcode_mutex);
 
@@ -580,7 +597,7 @@ out_driver:
 	mutex_unlock(&microcode_mutex);
 	put_online_cpus();
 
-out_pdev:
+ out_pdev:
 	platform_device_unregister(microcode_pdev);
 	return error;
 
@@ -596,6 +613,9 @@ static void __exit microcode_exit(void)
 	unregister_hotcpu_notifier(&mc_cpu_notifier);
 	unregister_syscore_ops(&mc_syscore_ops);
 
+	sysfs_remove_group(&cpu_subsys.dev_root->kobj,
+			   &cpu_root_microcode_group);
+
 	get_online_cpus();
 	mutex_lock(&microcode_mutex);
 
-- 
cgit v1.2.3


From 17d7a1123f0f6d532830152564cc812cc73db2f3 Mon Sep 17 00:00:00 2001
From: Hitoshi Mitake <h.mitake@gmail.com>
Date: Mon, 2 Jul 2012 22:46:17 +0900
Subject: perf bench: Fix confused variable namings and descriptions in mem
 subsystem

As Namhyung Kim pointed, there are confused namings and descriptions of words
"cycle" and "clock" in mem-memset.c and mem-memcpy.c.

With the option "-c" (or "--clock", now renamed as "--cycle"), mem subsystem
measures cost of memset() and memcpy() with cpu-cycles event.

But current mem subsystem source code contains lots of confused variable
namings and descriptions with "clock" (e.g. the variable use_clock). This is a
very bad style because there is another software event named "cpu-clock". This
patch replaces wrong usage of "clock" to "cycle".

v2: modified Documentation/perf-bench.txt for the descriptions of
--cycle option

Signed-off-by: Hitoshi Mitake <h.mitake@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1341236777-18457-1-git-send-email-h.mitake@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-bench.txt |  4 +-
 tools/perf/bench/mem-memcpy.c           | 80 ++++++++++++++++-----------------
 tools/perf/bench/mem-memset.c           | 80 ++++++++++++++++-----------------
 3 files changed, 82 insertions(+), 82 deletions(-)

diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index f3c716a4cad3..7065cd6fbdfc 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -144,7 +144,7 @@ On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported.
 Repeat memcpy invocation this number of times.
 
 -c::
---clock::
+--cycle::
 Use perf's cpu-cycles event instead of gettimeofday syscall.
 
 -o::
@@ -176,7 +176,7 @@ On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported.
 Repeat memset invocation this number of times.
 
 -c::
---clock::
+--cycle::
 Use perf's cpu-cycles event instead of gettimeofday syscall.
 
 -o::
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index d990365cafa0..02dad5d3359b 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -24,8 +24,8 @@
 static const char	*length_str	= "1MB";
 static const char	*routine	= "default";
 static int		iterations	= 1;
-static bool		use_clock;
-static int		clock_fd;
+static bool		use_cycle;
+static int		cycle_fd;
 static bool		only_prefault;
 static bool		no_prefault;
 
@@ -37,7 +37,7 @@ static const struct option options[] = {
 		    "Specify routine to copy"),
 	OPT_INTEGER('i', "iterations", &iterations,
 		    "repeat memcpy() invocation this number of times"),
-	OPT_BOOLEAN('c', "clock", &use_clock,
+	OPT_BOOLEAN('c', "cycle", &use_cycle,
 		    "Use cycles event instead of gettimeofday() for measuring"),
 	OPT_BOOLEAN('o', "only-prefault", &only_prefault,
 		    "Show only the result with page faults before memcpy()"),
@@ -76,27 +76,27 @@ static const char * const bench_mem_memcpy_usage[] = {
 	NULL
 };
 
-static struct perf_event_attr clock_attr = {
+static struct perf_event_attr cycle_attr = {
 	.type		= PERF_TYPE_HARDWARE,
 	.config		= PERF_COUNT_HW_CPU_CYCLES
 };
 
-static void init_clock(void)
+static void init_cycle(void)
 {
-	clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+	cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0);
 
-	if (clock_fd < 0 && errno == ENOSYS)
+	if (cycle_fd < 0 && errno == ENOSYS)
 		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 	else
-		BUG_ON(clock_fd < 0);
+		BUG_ON(cycle_fd < 0);
 }
 
-static u64 get_clock(void)
+static u64 get_cycle(void)
 {
 	int ret;
 	u64 clk;
 
-	ret = read(clock_fd, &clk, sizeof(u64));
+	ret = read(cycle_fd, &clk, sizeof(u64));
 	BUG_ON(ret != sizeof(u64));
 
 	return clk;
@@ -119,9 +119,9 @@ static void alloc_mem(void **dst, void **src, size_t length)
 		die("memory allocation failed - maybe length is too large?\n");
 }
 
-static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
+static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)
 {
-	u64 clock_start = 0ULL, clock_end = 0ULL;
+	u64 cycle_start = 0ULL, cycle_end = 0ULL;
 	void *src = NULL, *dst = NULL;
 	int i;
 
@@ -130,14 +130,14 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
 	if (prefault)
 		fn(dst, src, len);
 
-	clock_start = get_clock();
+	cycle_start = get_cycle();
 	for (i = 0; i < iterations; ++i)
 		fn(dst, src, len);
-	clock_end = get_clock();
+	cycle_end = get_cycle();
 
 	free(src);
 	free(dst);
-	return clock_end - clock_start;
+	return cycle_end - cycle_start;
 }
 
 static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
@@ -182,17 +182,17 @@ int bench_mem_memcpy(int argc, const char **argv,
 	int i;
 	size_t len;
 	double result_bps[2];
-	u64 result_clock[2];
+	u64 result_cycle[2];
 
 	argc = parse_options(argc, argv, options,
 			     bench_mem_memcpy_usage, 0);
 
-	if (use_clock)
-		init_clock();
+	if (use_cycle)
+		init_cycle();
 
 	len = (size_t)perf_atoll((char *)length_str);
 
-	result_clock[0] = result_clock[1] = 0ULL;
+	result_cycle[0] = result_cycle[1] = 0ULL;
 	result_bps[0] = result_bps[1] = 0.0;
 
 	if ((s64)len <= 0) {
@@ -223,11 +223,11 @@ int bench_mem_memcpy(int argc, const char **argv,
 
 	if (!only_prefault && !no_prefault) {
 		/* show both of results */
-		if (use_clock) {
-			result_clock[0] =
-				do_memcpy_clock(routines[i].fn, len, false);
-			result_clock[1] =
-				do_memcpy_clock(routines[i].fn, len, true);
+		if (use_cycle) {
+			result_cycle[0] =
+				do_memcpy_cycle(routines[i].fn, len, false);
+			result_cycle[1] =
+				do_memcpy_cycle(routines[i].fn, len, true);
 		} else {
 			result_bps[0] =
 				do_memcpy_gettimeofday(routines[i].fn,
@@ -237,9 +237,9 @@ int bench_mem_memcpy(int argc, const char **argv,
 						len, true);
 		}
 	} else {
-		if (use_clock) {
-			result_clock[pf] =
-				do_memcpy_clock(routines[i].fn,
+		if (use_cycle) {
+			result_cycle[pf] =
+				do_memcpy_cycle(routines[i].fn,
 						len, only_prefault);
 		} else {
 			result_bps[pf] =
@@ -251,12 +251,12 @@ int bench_mem_memcpy(int argc, const char **argv,
 	switch (bench_format) {
 	case BENCH_FORMAT_DEFAULT:
 		if (!only_prefault && !no_prefault) {
-			if (use_clock) {
-				printf(" %14lf Clock/Byte\n",
-					(double)result_clock[0]
+			if (use_cycle) {
+				printf(" %14lf Cycle/Byte\n",
+					(double)result_cycle[0]
 					/ (double)len);
-				printf(" %14lf Clock/Byte (with prefault)\n",
-					(double)result_clock[1]
+				printf(" %14lf Cycle/Byte (with prefault)\n",
+					(double)result_cycle[1]
 					/ (double)len);
 			} else {
 				print_bps(result_bps[0]);
@@ -265,9 +265,9 @@ int bench_mem_memcpy(int argc, const char **argv,
 				printf(" (with prefault)\n");
 			}
 		} else {
-			if (use_clock) {
-				printf(" %14lf Clock/Byte",
-					(double)result_clock[pf]
+			if (use_cycle) {
+				printf(" %14lf Cycle/Byte",
+					(double)result_cycle[pf]
 					/ (double)len);
 			} else
 				print_bps(result_bps[pf]);
@@ -277,17 +277,17 @@ int bench_mem_memcpy(int argc, const char **argv,
 		break;
 	case BENCH_FORMAT_SIMPLE:
 		if (!only_prefault && !no_prefault) {
-			if (use_clock) {
+			if (use_cycle) {
 				printf("%lf %lf\n",
-					(double)result_clock[0] / (double)len,
-					(double)result_clock[1] / (double)len);
+					(double)result_cycle[0] / (double)len,
+					(double)result_cycle[1] / (double)len);
 			} else {
 				printf("%lf %lf\n",
 					result_bps[0], result_bps[1]);
 			}
 		} else {
-			if (use_clock) {
-				printf("%lf\n", (double)result_clock[pf]
+			if (use_cycle) {
+				printf("%lf\n", (double)result_cycle[pf]
 					/ (double)len);
 			} else
 				printf("%lf\n", result_bps[pf]);
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
index bf0d5f552017..350cc9557265 100644
--- a/tools/perf/bench/mem-memset.c
+++ b/tools/perf/bench/mem-memset.c
@@ -24,8 +24,8 @@
 static const char	*length_str	= "1MB";
 static const char	*routine	= "default";
 static int		iterations	= 1;
-static bool		use_clock;
-static int		clock_fd;
+static bool		use_cycle;
+static int		cycle_fd;
 static bool		only_prefault;
 static bool		no_prefault;
 
@@ -37,7 +37,7 @@ static const struct option options[] = {
 		    "Specify routine to set"),
 	OPT_INTEGER('i', "iterations", &iterations,
 		    "repeat memset() invocation this number of times"),
-	OPT_BOOLEAN('c', "clock", &use_clock,
+	OPT_BOOLEAN('c', "cycle", &use_cycle,
 		    "Use cycles event instead of gettimeofday() for measuring"),
 	OPT_BOOLEAN('o', "only-prefault", &only_prefault,
 		    "Show only the result with page faults before memset()"),
@@ -76,27 +76,27 @@ static const char * const bench_mem_memset_usage[] = {
 	NULL
 };
 
-static struct perf_event_attr clock_attr = {
+static struct perf_event_attr cycle_attr = {
 	.type		= PERF_TYPE_HARDWARE,
 	.config		= PERF_COUNT_HW_CPU_CYCLES
 };
 
-static void init_clock(void)
+static void init_cycle(void)
 {
-	clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+	cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0);
 
-	if (clock_fd < 0 && errno == ENOSYS)
+	if (cycle_fd < 0 && errno == ENOSYS)
 		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 	else
-		BUG_ON(clock_fd < 0);
+		BUG_ON(cycle_fd < 0);
 }
 
-static u64 get_clock(void)
+static u64 get_cycle(void)
 {
 	int ret;
 	u64 clk;
 
-	ret = read(clock_fd, &clk, sizeof(u64));
+	ret = read(cycle_fd, &clk, sizeof(u64));
 	BUG_ON(ret != sizeof(u64));
 
 	return clk;
@@ -115,9 +115,9 @@ static void alloc_mem(void **dst, size_t length)
 		die("memory allocation failed - maybe length is too large?\n");
 }
 
-static u64 do_memset_clock(memset_t fn, size_t len, bool prefault)
+static u64 do_memset_cycle(memset_t fn, size_t len, bool prefault)
 {
-	u64 clock_start = 0ULL, clock_end = 0ULL;
+	u64 cycle_start = 0ULL, cycle_end = 0ULL;
 	void *dst = NULL;
 	int i;
 
@@ -126,13 +126,13 @@ static u64 do_memset_clock(memset_t fn, size_t len, bool prefault)
 	if (prefault)
 		fn(dst, -1, len);
 
-	clock_start = get_clock();
+	cycle_start = get_cycle();
 	for (i = 0; i < iterations; ++i)
 		fn(dst, i, len);
-	clock_end = get_clock();
+	cycle_end = get_cycle();
 
 	free(dst);
-	return clock_end - clock_start;
+	return cycle_end - cycle_start;
 }
 
 static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault)
@@ -176,17 +176,17 @@ int bench_mem_memset(int argc, const char **argv,
 	int i;
 	size_t len;
 	double result_bps[2];
-	u64 result_clock[2];
+	u64 result_cycle[2];
 
 	argc = parse_options(argc, argv, options,
 			     bench_mem_memset_usage, 0);
 
-	if (use_clock)
-		init_clock();
+	if (use_cycle)
+		init_cycle();
 
 	len = (size_t)perf_atoll((char *)length_str);
 
-	result_clock[0] = result_clock[1] = 0ULL;
+	result_cycle[0] = result_cycle[1] = 0ULL;
 	result_bps[0] = result_bps[1] = 0.0;
 
 	if ((s64)len <= 0) {
@@ -217,11 +217,11 @@ int bench_mem_memset(int argc, const char **argv,
 
 	if (!only_prefault && !no_prefault) {
 		/* show both of results */
-		if (use_clock) {
-			result_clock[0] =
-				do_memset_clock(routines[i].fn, len, false);
-			result_clock[1] =
-				do_memset_clock(routines[i].fn, len, true);
+		if (use_cycle) {
+			result_cycle[0] =
+				do_memset_cycle(routines[i].fn, len, false);
+			result_cycle[1] =
+				do_memset_cycle(routines[i].fn, len, true);
 		} else {
 			result_bps[0] =
 				do_memset_gettimeofday(routines[i].fn,
@@ -231,9 +231,9 @@ int bench_mem_memset(int argc, const char **argv,
 						len, true);
 		}
 	} else {
-		if (use_clock) {
-			result_clock[pf] =
-				do_memset_clock(routines[i].fn,
+		if (use_cycle) {
+			result_cycle[pf] =
+				do_memset_cycle(routines[i].fn,
 						len, only_prefault);
 		} else {
 			result_bps[pf] =
@@ -245,12 +245,12 @@ int bench_mem_memset(int argc, const char **argv,
 	switch (bench_format) {
 	case BENCH_FORMAT_DEFAULT:
 		if (!only_prefault && !no_prefault) {
-			if (use_clock) {
-				printf(" %14lf Clock/Byte\n",
-					(double)result_clock[0]
+			if (use_cycle) {
+				printf(" %14lf Cycle/Byte\n",
+					(double)result_cycle[0]
 					/ (double)len);
-				printf(" %14lf Clock/Byte (with prefault)\n ",
-					(double)result_clock[1]
+				printf(" %14lf Cycle/Byte (with prefault)\n ",
+					(double)result_cycle[1]
 					/ (double)len);
 			} else {
 				print_bps(result_bps[0]);
@@ -259,9 +259,9 @@ int bench_mem_memset(int argc, const char **argv,
 				printf(" (with prefault)\n");
 			}
 		} else {
-			if (use_clock) {
-				printf(" %14lf Clock/Byte",
-					(double)result_clock[pf]
+			if (use_cycle) {
+				printf(" %14lf Cycle/Byte",
+					(double)result_cycle[pf]
 					/ (double)len);
 			} else
 				print_bps(result_bps[pf]);
@@ -271,17 +271,17 @@ int bench_mem_memset(int argc, const char **argv,
 		break;
 	case BENCH_FORMAT_SIMPLE:
 		if (!only_prefault && !no_prefault) {
-			if (use_clock) {
+			if (use_cycle) {
 				printf("%lf %lf\n",
-					(double)result_clock[0] / (double)len,
-					(double)result_clock[1] / (double)len);
+					(double)result_cycle[0] / (double)len,
+					(double)result_cycle[1] / (double)len);
 			} else {
 				printf("%lf %lf\n",
 					result_bps[0], result_bps[1]);
 			}
 		} else {
-			if (use_clock) {
-				printf("%lf\n", (double)result_clock[pf]
+			if (use_cycle) {
+				printf("%lf\n", (double)result_cycle[pf]
 					/ (double)len);
 			} else
 				printf("%lf\n", result_bps[pf]);
-- 
cgit v1.2.3


From f885b7f2b2de70be266d2cecc476f773a1e2ca5d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 23 Apr 2012 15:52:53 -0700
Subject: rcu: Control RCU_FANOUT_LEAF from boot-time parameter

Although making RCU_FANOUT_LEAF a kernel configuration parameter rather
than a fixed constant makes it easier for people to decrease cache-miss
overhead for large systems, it is of little help for people who must
run a single pre-built kernel binary.

This commit therefore allows the value of RCU_FANOUT_LEAF to be
increased (but not decreased!) via a boot-time parameter named
rcutree.rcu_fanout_leaf.

Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/kernel-parameters.txt |  5 ++
 kernel/rcutree.c                    | 97 +++++++++++++++++++++++++++++++------
 kernel/rcutree.h                    | 23 +++++----
 kernel/rcutree_plugin.h             |  2 +
 kernel/rcutree_trace.c              |  2 +-
 5 files changed, 104 insertions(+), 25 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a92c5ebf373e..12783fa833c3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2367,6 +2367,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Set maximum number of finished RCU callbacks to process
 			in one batch.
 
+	rcutree.fanout_leaf=	[KNL,BOOT]
+			Increase the number of CPUs assigned to each
+			leaf rcu_node structure.  Useful for very large
+			systems.
+
 	rcutree.qhimark=	[KNL,BOOT]
 			Set threshold of queued
 			RCU callbacks over which batch limiting is disabled.
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 4b97bba7396e..a4c592b66e10 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -60,17 +60,10 @@
 
 /* Data structures. */
 
-static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
+static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
 
 #define RCU_STATE_INITIALIZER(structname) { \
 	.level = { &structname##_state.node[0] }, \
-	.levelcnt = { \
-		NUM_RCU_LVL_0,  /* root of hierarchy. */ \
-		NUM_RCU_LVL_1, \
-		NUM_RCU_LVL_2, \
-		NUM_RCU_LVL_3, \
-		NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
-	}, \
 	.fqs_state = RCU_GP_IDLE, \
 	.gpnum = -300, \
 	.completed = -300, \
@@ -91,6 +84,19 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 
 static struct rcu_state *rcu_state;
 
+/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
+static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
+module_param(rcu_fanout_leaf, int, 0);
+int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
+static int num_rcu_lvl[] = {  /* Number of rcu_nodes at specified level. */
+	NUM_RCU_LVL_0,
+	NUM_RCU_LVL_1,
+	NUM_RCU_LVL_2,
+	NUM_RCU_LVL_3,
+	NUM_RCU_LVL_4,
+};
+int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
+
 /*
  * The rcu_scheduler_active variable transitions from zero to one just
  * before the first task is spawned.  So when this variable is zero, RCU
@@ -2574,9 +2580,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
 {
 	int i;
 
-	for (i = NUM_RCU_LVLS - 1; i > 0; i--)
+	for (i = rcu_num_lvls - 1; i > 0; i--)
 		rsp->levelspread[i] = CONFIG_RCU_FANOUT;
-	rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF;
+	rsp->levelspread[0] = rcu_fanout_leaf;
 }
 #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
 static void __init rcu_init_levelspread(struct rcu_state *rsp)
@@ -2586,7 +2592,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
 	int i;
 
 	cprv = NR_CPUS;
-	for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
+	for (i = rcu_num_lvls - 1; i >= 0; i--) {
 		ccur = rsp->levelcnt[i];
 		rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
 		cprv = ccur;
@@ -2613,13 +2619,15 @@ static void __init rcu_init_one(struct rcu_state *rsp,
 
 	/* Initialize the level-tracking arrays. */
 
-	for (i = 1; i < NUM_RCU_LVLS; i++)
+	for (i = 0; i < rcu_num_lvls; i++)
+		rsp->levelcnt[i] = num_rcu_lvl[i];
+	for (i = 1; i < rcu_num_lvls; i++)
 		rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
 	rcu_init_levelspread(rsp);
 
 	/* Initialize the elements themselves, starting from the leaves. */
 
-	for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
+	for (i = rcu_num_lvls - 1; i >= 0; i--) {
 		cpustride *= rsp->levelspread[i];
 		rnp = rsp->level[i];
 		for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
@@ -2649,7 +2657,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
 	}
 
 	rsp->rda = rda;
-	rnp = rsp->level[NUM_RCU_LVLS - 1];
+	rnp = rsp->level[rcu_num_lvls - 1];
 	for_each_possible_cpu(i) {
 		while (i > rnp->grphi)
 			rnp++;
@@ -2658,11 +2666,72 @@ static void __init rcu_init_one(struct rcu_state *rsp,
 	}
 }
 
+/*
+ * Compute the rcu_node tree geometry from kernel parameters.  This cannot
+ * replace the definitions in rcutree.h because those are needed to size
+ * the ->node array in the rcu_state structure.
+ */
+static void __init rcu_init_geometry(void)
+{
+	int i;
+	int j;
+	int n = NR_CPUS;
+	int rcu_capacity[MAX_RCU_LVLS + 1];
+
+	/* If the compile-time values are accurate, just leave. */
+	if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF)
+		return;
+
+	/*
+	 * Compute number of nodes that can be handled an rcu_node tree
+	 * with the given number of levels.  Setting rcu_capacity[0] makes
+	 * some of the arithmetic easier.
+	 */
+	rcu_capacity[0] = 1;
+	rcu_capacity[1] = rcu_fanout_leaf;
+	for (i = 2; i <= MAX_RCU_LVLS; i++)
+		rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
+
+	/*
+	 * The boot-time rcu_fanout_leaf parameter is only permitted
+	 * to increase the leaf-level fanout, not decrease it.  Of course,
+	 * the leaf-level fanout cannot exceed the number of bits in
+	 * the rcu_node masks.  Finally, the tree must be able to accommodate
+	 * the configured number of CPUs.  Complain and fall back to the
+	 * compile-time values if these limits are exceeded.
+	 */
+	if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
+	    rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
+	    n > rcu_capacity[MAX_RCU_LVLS]) {
+		WARN_ON(1);
+		return;
+	}
+
+	/* Calculate the number of rcu_nodes at each level of the tree. */
+	for (i = 1; i <= MAX_RCU_LVLS; i++)
+		if (n <= rcu_capacity[i]) {
+			for (j = 0; j <= i; j++)
+				num_rcu_lvl[j] =
+					DIV_ROUND_UP(n, rcu_capacity[i - j]);
+			rcu_num_lvls = i;
+			for (j = i + 1; j <= MAX_RCU_LVLS; j++)
+				num_rcu_lvl[j] = 0;
+			break;
+		}
+
+	/* Calculate the total number of rcu_node structures. */
+	rcu_num_nodes = 0;
+	for (i = 0; i <= MAX_RCU_LVLS; i++)
+		rcu_num_nodes += num_rcu_lvl[i];
+	rcu_num_nodes -= n;
+}
+
 void __init rcu_init(void)
 {
 	int cpu;
 
 	rcu_bootup_announce();
+	rcu_init_geometry();
 	rcu_init_one(&rcu_sched_state, &rcu_sched_data);
 	rcu_init_one(&rcu_bh_state, &rcu_bh_data);
 	__rcu_init_preempt();
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 19b61ac1079f..780a0195d35a 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -42,28 +42,28 @@
 #define RCU_FANOUT_4	      (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
 
 #if NR_CPUS <= RCU_FANOUT_1
-#  define NUM_RCU_LVLS	      1
+#  define RCU_NUM_LVLS	      1
 #  define NUM_RCU_LVL_0	      1
 #  define NUM_RCU_LVL_1	      (NR_CPUS)
 #  define NUM_RCU_LVL_2	      0
 #  define NUM_RCU_LVL_3	      0
 #  define NUM_RCU_LVL_4	      0
 #elif NR_CPUS <= RCU_FANOUT_2
-#  define NUM_RCU_LVLS	      2
+#  define RCU_NUM_LVLS	      2
 #  define NUM_RCU_LVL_0	      1
 #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
 #  define NUM_RCU_LVL_2	      (NR_CPUS)
 #  define NUM_RCU_LVL_3	      0
 #  define NUM_RCU_LVL_4	      0
 #elif NR_CPUS <= RCU_FANOUT_3
-#  define NUM_RCU_LVLS	      3
+#  define RCU_NUM_LVLS	      3
 #  define NUM_RCU_LVL_0	      1
 #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
 #  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
 #  define NUM_RCU_LVL_3	      (NR_CPUS)
 #  define NUM_RCU_LVL_4	      0
 #elif NR_CPUS <= RCU_FANOUT_4
-#  define NUM_RCU_LVLS	      4
+#  define RCU_NUM_LVLS	      4
 #  define NUM_RCU_LVL_0	      1
 #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
 #  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
@@ -76,6 +76,9 @@
 #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
 #define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
 
+extern int rcu_num_lvls;
+extern int rcu_num_nodes;
+
 /*
  * Dynticks per-CPU state.
  */
@@ -206,7 +209,7 @@ struct rcu_node {
  */
 #define rcu_for_each_node_breadth_first(rsp, rnp) \
 	for ((rnp) = &(rsp)->node[0]; \
-	     (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
+	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
 
 /*
  * Do a breadth-first scan of the non-leaf rcu_node structures for the
@@ -215,7 +218,7 @@ struct rcu_node {
  */
 #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
 	for ((rnp) = &(rsp)->node[0]; \
-	     (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++)
+	     (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
 
 /*
  * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
@@ -224,8 +227,8 @@ struct rcu_node {
  * It is still a leaf node, even if it is also the root node.
  */
 #define rcu_for_each_leaf_node(rsp, rnp) \
-	for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \
-	     (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
+	for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
+	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
 
 /* Index values for nxttail array in struct rcu_data. */
 #define RCU_DONE_TAIL		0	/* Also RCU_WAIT head. */
@@ -357,9 +360,9 @@ do {									\
  */
 struct rcu_state {
 	struct rcu_node node[NUM_RCU_NODES];	/* Hierarchy. */
-	struct rcu_node *level[NUM_RCU_LVLS];	/* Hierarchy levels. */
+	struct rcu_node *level[RCU_NUM_LVLS];	/* Hierarchy levels. */
 	u32 levelcnt[MAX_RCU_LVLS + 1];		/* # nodes in each level. */
-	u8 levelspread[NUM_RCU_LVLS];		/* kids/node in each level. */
+	u8 levelspread[RCU_NUM_LVLS];		/* kids/node in each level. */
 	struct rcu_data __percpu *rda;		/* pointer of percu rcu_data. */
 
 	/* The following fields are guarded by the root rcu_node's lock. */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 3e4899459f3d..fb92b6dd9980 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -70,6 +70,8 @@ static void __init rcu_bootup_announce_oddness(void)
 #if NUM_RCU_LVL_4 != 0
 	printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
 #endif
+	if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
+		printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
 }
 
 #ifdef CONFIG_TREE_PREEMPT_RCU
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index d4bc16ddd1d4..a3556a2d842c 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -278,7 +278,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 		   rsp->n_force_qs, rsp->n_force_qs_ngp,
 		   rsp->n_force_qs - rsp->n_force_qs_ngp,
 		   rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen);
-	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) {
+	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
 		if (rnp->level != level) {
 			seq_puts(m, "\n");
 			level = rnp->level;
-- 
cgit v1.2.3


From cc5df65b0370fc6aa2bfe3bb19e0451d5cafb99f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 15 Jun 2012 18:16:00 -0700
Subject: rcu: Four-level hierarchy is no longer experimental

Time to make the four-level-hierarchy setting less scary, so this
commit removes "Experimental" from the boot-time message.  Leave the
message in order to get a heads-up on any possible need to expand to
a five-level hierarchy.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree_plugin.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index fb92b6dd9980..70e0fd256cc6 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -68,7 +68,7 @@ static void __init rcu_bootup_announce_oddness(void)
 	printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n");
 #endif
 #if NUM_RCU_LVL_4 != 0
-	printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
+	printk(KERN_INFO "\tFour-level hierarchy is enabled.\n");
 #endif
 	if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
 		printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
-- 
cgit v1.2.3


From cca6f3931920a7547d02e68adc2ca635bea5600c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 8 May 2012 21:00:28 -0700
Subject: rcu: Size rcu_node tree from nr_cpu_ids rather than NR_CPUS

The rcu_node tree array is sized based on compile-time constants,
including NR_CPUS.  Although this approach has worked well in the past,
the recent trend by many distros to define NR_CPUS=4096 results in
excessive grace-period-initialization latencies.

This commit therefore substitutes the run-time computed nr_cpu_ids for
the compile-time NR_CPUS when building the tree.  This can result in
much of the compile-time-allocated rcu_node array being unused.  If
this is a major problem, you are in a specialized situation anyway,
so you can manually adjust the NR_CPUS, RCU_FANOUT, and RCU_FANOUT_LEAF
kernel config parameters.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree.c        | 2 +-
 kernel/rcutree_plugin.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index a4c592b66e10..0fdbc5e07302 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -2675,7 +2675,7 @@ static void __init rcu_init_geometry(void)
 {
 	int i;
 	int j;
-	int n = NR_CPUS;
+	int n = nr_cpu_ids;
 	int rcu_capacity[MAX_RCU_LVLS + 1];
 
 	/* If the compile-time values are accurate, just leave. */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 70e0fd256cc6..ef2b5231afa4 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -72,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
 #endif
 	if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
 		printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
+	if (nr_cpu_ids != NR_CPUS)
+		printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
 }
 
 #ifdef CONFIG_TREE_PREEMPT_RCU
-- 
cgit v1.2.3


From 6c90cc7bf077f28144013e949ee0c122012d194a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Mon, 28 May 2012 19:41:41 -0700
Subject: rcu: Prevent excessive line length in RCU_STATE_INITIALIZER()

Upcoming rcu_barrier() concurrency commits will result in line lengths
greater than 80 characters in the RCU_STATE_INITIALIZER(), so this commit
shortens the name of the macro's argument to prevent this.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutree.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0fdbc5e07302..dd7fd96c90c5 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -62,18 +62,18 @@
 
 static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
 
-#define RCU_STATE_INITIALIZER(structname) { \
-	.level = { &structname##_state.node[0] }, \
+#define RCU_STATE_INITIALIZER(sname) { \
+	.level = { &sname##_state.node[0] }, \
 	.fqs_state = RCU_GP_IDLE, \
 	.gpnum = -300, \
 	.completed = -300, \
-	.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \
-	.orphan_nxttail = &structname##_state.orphan_nxtlist, \
-	.orphan_donetail = &structname##_state.orphan_donelist, \
-	.fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \
+	.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \
+	.orphan_nxttail = &sname##_state.orphan_nxtlist, \
+	.orphan_donetail = &sname##_state.orphan_donelist, \
+	.fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \
 	.n_force_qs = 0, \
 	.n_force_qs_ngp = 0, \
-	.name = #structname, \
+	.name = #sname, \
 }
 
 struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched);
-- 
cgit v1.2.3


From 037b64ed0bf2405a1a01542164d3418564b44fff Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Mon, 28 May 2012 23:26:01 -0700
Subject: rcu: Place pointer to call_rcu() in rcu_data structure

This is a preparatory commit for increasing rcu_barrier()'s concurrency.
It adds a pointer in the rcu_data structure to the corresponding call_rcu()
function.  This allows a pointer to the rcu_data structure to imply the
function pointer, which allows _rcu_barrier() state to be placed in the
rcu_state structure.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutree.c        | 27 ++++++++++++---------------
 kernel/rcutree.h        |  2 ++
 kernel/rcutree_plugin.h |  5 +++--
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index dd7fd96c90c5..00c518fa34bb 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -62,8 +62,9 @@
 
 static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
 
-#define RCU_STATE_INITIALIZER(sname) { \
+#define RCU_STATE_INITIALIZER(sname, cr) { \
 	.level = { &sname##_state.node[0] }, \
+	.call = cr, \
 	.fqs_state = RCU_GP_IDLE, \
 	.gpnum = -300, \
 	.completed = -300, \
@@ -76,10 +77,11 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
 	.name = #sname, \
 }
 
-struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched);
+struct rcu_state rcu_sched_state =
+	RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched);
 DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
 
-struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh);
+struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh);
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 
 static struct rcu_state *rcu_state;
@@ -2282,21 +2284,17 @@ static void rcu_barrier_func(void *type)
 {
 	int cpu = smp_processor_id();
 	struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
-	void (*call_rcu_func)(struct rcu_head *head,
-			      void (*func)(struct rcu_head *head));
+	struct rcu_state *rsp = type;
 
 	atomic_inc(&rcu_barrier_cpu_count);
-	call_rcu_func = type;
-	call_rcu_func(head, rcu_barrier_callback);
+	rsp->call(head, rcu_barrier_callback);
 }
 
 /*
  * Orchestrate the specified type of RCU barrier, waiting for all
  * RCU callbacks of the specified type to complete.
  */
-static void _rcu_barrier(struct rcu_state *rsp,
-			 void (*call_rcu_func)(struct rcu_head *head,
-					       void (*func)(struct rcu_head *head)))
+static void _rcu_barrier(struct rcu_state *rsp)
 {
 	int cpu;
 	unsigned long flags;
@@ -2348,8 +2346,7 @@ static void _rcu_barrier(struct rcu_state *rsp,
 			while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen))
 				schedule_timeout_interruptible(1);
 		} else if (ACCESS_ONCE(rdp->qlen)) {
-			smp_call_function_single(cpu, rcu_barrier_func,
-						 (void *)call_rcu_func, 1);
+			smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
 			preempt_enable();
 		} else {
 			preempt_enable();
@@ -2370,7 +2367,7 @@ static void _rcu_barrier(struct rcu_state *rsp,
 	raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
 	atomic_inc(&rcu_barrier_cpu_count);
 	smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */
-	call_rcu_func(&rh, rcu_barrier_callback);
+	rsp->call(&rh, rcu_barrier_callback);
 
 	/*
 	 * Now that we have an rcu_barrier_callback() callback on each
@@ -2393,7 +2390,7 @@ static void _rcu_barrier(struct rcu_state *rsp,
  */
 void rcu_barrier_bh(void)
 {
-	_rcu_barrier(&rcu_bh_state, call_rcu_bh);
+	_rcu_barrier(&rcu_bh_state);
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_bh);
 
@@ -2402,7 +2399,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh);
  */
 void rcu_barrier_sched(void)
 {
-	_rcu_barrier(&rcu_sched_state, call_rcu_sched);
+	_rcu_barrier(&rcu_sched_state);
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
 
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 780a0195d35a..049896a835d9 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -364,6 +364,8 @@ struct rcu_state {
 	u32 levelcnt[MAX_RCU_LVLS + 1];		/* # nodes in each level. */
 	u8 levelspread[RCU_NUM_LVLS];		/* kids/node in each level. */
 	struct rcu_data __percpu *rda;		/* pointer of percu rcu_data. */
+	void (*call)(struct rcu_head *head,	/* call_rcu() flavor. */
+		     void (*func)(struct rcu_head *head));
 
 	/* The following fields are guarded by the root rcu_node's lock. */
 
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index ef2b5231afa4..9cb3a68819fa 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -78,7 +78,8 @@ static void __init rcu_bootup_announce_oddness(void)
 
 #ifdef CONFIG_TREE_PREEMPT_RCU
 
-struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt);
+struct rcu_state rcu_preempt_state =
+	RCU_STATE_INITIALIZER(rcu_preempt, call_rcu);
 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
 static struct rcu_state *rcu_state = &rcu_preempt_state;
 
@@ -944,7 +945,7 @@ static int rcu_preempt_cpu_has_callbacks(int cpu)
  */
 void rcu_barrier(void)
 {
-	_rcu_barrier(&rcu_preempt_state, call_rcu);
+	_rcu_barrier(&rcu_preempt_state);
 }
 EXPORT_SYMBOL_GPL(rcu_barrier);
 
-- 
cgit v1.2.3


From 06668efa9180f4824fe846a8ff96338c18646bc7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Mon, 28 May 2012 23:57:46 -0700
Subject: rcu: Move _rcu_barrier()'s rcu_head structures to rcu_data structures

In order for multiple flavors of RCU to each concurrently run one
rcu_barrier(), each flavor needs its own per-CPU set of rcu_head
structures.  This commit therefore moves _rcu_barrier()'s set of
per-CPU rcu_head structures from per-CPU variables to the existing
per-CPU and per-RCU-flavor rcu_data structures.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutree.c | 6 ++----
 kernel/rcutree.h | 3 +++
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 00c518fa34bb..1e552598b55d 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -157,7 +157,6 @@ unsigned long rcutorture_vernum;
 
 /* State information for rcu_barrier() and friends. */
 
-static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
 static atomic_t rcu_barrier_cpu_count;
 static DEFINE_MUTEX(rcu_barrier_mutex);
 static struct completion rcu_barrier_completion;
@@ -2282,12 +2281,11 @@ static void rcu_barrier_callback(struct rcu_head *notused)
  */
 static void rcu_barrier_func(void *type)
 {
-	int cpu = smp_processor_id();
-	struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
 	struct rcu_state *rsp = type;
+	struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
 
 	atomic_inc(&rcu_barrier_cpu_count);
-	rsp->call(head, rcu_barrier_callback);
+	rsp->call(&rdp->barrier_head, rcu_barrier_callback);
 }
 
 /*
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 049896a835d9..586d93c978f2 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -314,6 +314,9 @@ struct rcu_data {
 	unsigned long n_rp_need_fqs;
 	unsigned long n_rp_need_nothing;
 
+	/* 6) _rcu_barrier() callback. */
+	struct rcu_head barrier_head;
+
 	int cpu;
 	struct rcu_state *rsp;
 };
-- 
cgit v1.2.3


From 24ebbca8ecdd5129d7f829a7cb5146aaeb531f77 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Tue, 29 May 2012 00:34:56 -0700
Subject: rcu: Move rcu_barrier_cpu_count to rcu_state structure

In order to allow each RCU flavor to concurrently execute its rcu_barrier()
function, it is necessary to move the relevant state to the rcu_state
structure.  This commit therefore moves the rcu_barrier_cpu_count global
variable to a new ->barrier_cpu_count field in the rcu_state structure.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutree.c | 25 ++++++++++++++-----------
 kernel/rcutree.h |  1 +
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 1e552598b55d..5929b021666d 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -157,7 +157,6 @@ unsigned long rcutorture_vernum;
 
 /* State information for rcu_barrier() and friends. */
 
-static atomic_t rcu_barrier_cpu_count;
 static DEFINE_MUTEX(rcu_barrier_mutex);
 static struct completion rcu_barrier_completion;
 
@@ -2270,9 +2269,12 @@ static int rcu_cpu_has_callbacks(int cpu)
  * RCU callback function for _rcu_barrier().  If we are last, wake
  * up the task executing _rcu_barrier().
  */
-static void rcu_barrier_callback(struct rcu_head *notused)
+static void rcu_barrier_callback(struct rcu_head *rhp)
 {
-	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
+	struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head);
+	struct rcu_state *rsp = rdp->rsp;
+
+	if (atomic_dec_and_test(&rsp->barrier_cpu_count))
 		complete(&rcu_barrier_completion);
 }
 
@@ -2284,7 +2286,7 @@ static void rcu_barrier_func(void *type)
 	struct rcu_state *rsp = type;
 	struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
 
-	atomic_inc(&rcu_barrier_cpu_count);
+	atomic_inc(&rsp->barrier_cpu_count);
 	rsp->call(&rdp->barrier_head, rcu_barrier_callback);
 }
 
@@ -2297,9 +2299,9 @@ static void _rcu_barrier(struct rcu_state *rsp)
 	int cpu;
 	unsigned long flags;
 	struct rcu_data *rdp;
-	struct rcu_head rh;
+	struct rcu_data rd;
 
-	init_rcu_head_on_stack(&rh);
+	init_rcu_head_on_stack(&rd.barrier_head);
 
 	/* Take mutex to serialize concurrent rcu_barrier() requests. */
 	mutex_lock(&rcu_barrier_mutex);
@@ -2324,7 +2326,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
 	 *	us -- but before CPU 1's orphaned callbacks are invoked!!!
 	 */
 	init_completion(&rcu_barrier_completion);
-	atomic_set(&rcu_barrier_cpu_count, 1);
+	atomic_set(&rsp->barrier_cpu_count, 1);
 	raw_spin_lock_irqsave(&rsp->onofflock, flags);
 	rsp->rcu_barrier_in_progress = current;
 	raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
@@ -2363,15 +2365,16 @@ static void _rcu_barrier(struct rcu_state *rsp)
 	rcu_adopt_orphan_cbs(rsp);
 	rsp->rcu_barrier_in_progress = NULL;
 	raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
-	atomic_inc(&rcu_barrier_cpu_count);
+	atomic_inc(&rsp->barrier_cpu_count);
 	smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */
-	rsp->call(&rh, rcu_barrier_callback);
+	rd.rsp = rsp;
+	rsp->call(&rd.barrier_head, rcu_barrier_callback);
 
 	/*
 	 * Now that we have an rcu_barrier_callback() callback on each
 	 * CPU, and thus each counted, remove the initial count.
 	 */
-	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
+	if (atomic_dec_and_test(&rsp->barrier_cpu_count))
 		complete(&rcu_barrier_completion);
 
 	/* Wait for all rcu_barrier_callback() callbacks to be invoked. */
@@ -2380,7 +2383,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
 	/* Other rcu_barrier() invocations can now safely proceed. */
 	mutex_unlock(&rcu_barrier_mutex);
 
-	destroy_rcu_head_on_stack(&rh);
+	destroy_rcu_head_on_stack(&rd.barrier_head);
 }
 
 /**
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 586d93c978f2..c57ef0b7f097 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -400,6 +400,7 @@ struct rcu_state {
 	struct task_struct *rcu_barrier_in_progress;
 						/* Task doing rcu_barrier(), */
 						/*  or NULL if no barrier. */
+	atomic_t barrier_cpu_count;		/* # CPUs waiting on. */
 	raw_spinlock_t fqslock;			/* Only one task forcing */
 						/*  quiescent states. */
 	unsigned long jiffies_force_qs;		/* Time at which to invoke */
-- 
cgit v1.2.3


From 7db74df88b52844f4e966901e2972bba725e6766 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Tue, 29 May 2012 03:03:37 -0700
Subject: rcu: Move rcu_barrier_completion to rcu_state structure

In order to allow each RCU flavor to concurrently execute its
rcu_barrier() function, it is necessary to move the relevant
state to the rcu_state structure.  This commit therefore moves the
rcu_barrier_completion global variable to a new ->barrier_completion
field in the rcu_state structure.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutree.c | 9 ++++-----
 kernel/rcutree.h | 1 +
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 5929b021666d..ca7d1678ac79 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -158,7 +158,6 @@ unsigned long rcutorture_vernum;
 /* State information for rcu_barrier() and friends. */
 
 static DEFINE_MUTEX(rcu_barrier_mutex);
-static struct completion rcu_barrier_completion;
 
 /*
  * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
@@ -2275,7 +2274,7 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
 	struct rcu_state *rsp = rdp->rsp;
 
 	if (atomic_dec_and_test(&rsp->barrier_cpu_count))
-		complete(&rcu_barrier_completion);
+		complete(&rsp->barrier_completion);
 }
 
 /*
@@ -2325,7 +2324,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
 	 * 6.	Both rcu_barrier_callback() callbacks are invoked, awakening
 	 *	us -- but before CPU 1's orphaned callbacks are invoked!!!
 	 */
-	init_completion(&rcu_barrier_completion);
+	init_completion(&rsp->barrier_completion);
 	atomic_set(&rsp->barrier_cpu_count, 1);
 	raw_spin_lock_irqsave(&rsp->onofflock, flags);
 	rsp->rcu_barrier_in_progress = current;
@@ -2375,10 +2374,10 @@ static void _rcu_barrier(struct rcu_state *rsp)
 	 * CPU, and thus each counted, remove the initial count.
 	 */
 	if (atomic_dec_and_test(&rsp->barrier_cpu_count))
-		complete(&rcu_barrier_completion);
+		complete(&rsp->barrier_completion);
 
 	/* Wait for all rcu_barrier_callback() callbacks to be invoked. */
-	wait_for_completion(&rcu_barrier_completion);
+	wait_for_completion(&rsp->barrier_completion);
 
 	/* Other rcu_barrier() invocations can now safely proceed. */
 	mutex_unlock(&rcu_barrier_mutex);
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index c57ef0b7f097..d1ca4424122b 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -401,6 +401,7 @@ struct rcu_state {
 						/* Task doing rcu_barrier(), */
 						/*  or NULL if no barrier. */
 	atomic_t barrier_cpu_count;		/* # CPUs waiting on. */
+	struct completion barrier_completion;	/* Wake at barrier end. */
 	raw_spinlock_t fqslock;			/* Only one task forcing */
 						/*  quiescent states. */
 	unsigned long jiffies_force_qs;		/* Time at which to invoke */
-- 
cgit v1.2.3


From 7be7f0be907224445acc62b3884c892f38b7ff40 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Tue, 29 May 2012 05:18:53 -0700
Subject: rcu: Move rcu_barrier_mutex to rcu_state structure

In order to allow each RCU flavor to concurrently execute its
rcu_barrier() function, it is necessary to move the relevant
state to the rcu_state structure.  This commit therefore moves the
rcu_barrier_mutex global variable to a new ->barrier_mutex field
in the rcu_state structure.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree.c | 9 +++------
 kernel/rcutree.h | 1 +
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index ca7d1678ac79..ff992ac24e73 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -71,6 +71,7 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
 	.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \
 	.orphan_nxttail = &sname##_state.orphan_nxtlist, \
 	.orphan_donetail = &sname##_state.orphan_donelist, \
+	.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
 	.fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \
 	.n_force_qs = 0, \
 	.n_force_qs_ngp = 0, \
@@ -155,10 +156,6 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
 unsigned long rcutorture_testseq;
 unsigned long rcutorture_vernum;
 
-/* State information for rcu_barrier() and friends. */
-
-static DEFINE_MUTEX(rcu_barrier_mutex);
-
 /*
  * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
  * permit this function to be invoked without holding the root rcu_node
@@ -2303,7 +2300,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
 	init_rcu_head_on_stack(&rd.barrier_head);
 
 	/* Take mutex to serialize concurrent rcu_barrier() requests. */
-	mutex_lock(&rcu_barrier_mutex);
+	mutex_lock(&rsp->barrier_mutex);
 
 	smp_mb();  /* Prevent any prior operations from leaking in. */
 
@@ -2380,7 +2377,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
 	wait_for_completion(&rsp->barrier_completion);
 
 	/* Other rcu_barrier() invocations can now safely proceed. */
-	mutex_unlock(&rcu_barrier_mutex);
+	mutex_unlock(&rsp->barrier_mutex);
 
 	destroy_rcu_head_on_stack(&rd.barrier_head);
 }
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index d1ca4424122b..7641aec3e59c 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -400,6 +400,7 @@ struct rcu_state {
 	struct task_struct *rcu_barrier_in_progress;
 						/* Task doing rcu_barrier(), */
 						/*  or NULL if no barrier. */
+	struct mutex barrier_mutex;		/* Guards barrier fields. */
 	atomic_t barrier_cpu_count;		/* # CPUs waiting on. */
 	struct completion barrier_completion;	/* Wake at barrier end. */
 	raw_spinlock_t fqslock;			/* Only one task forcing */
-- 
cgit v1.2.3


From cfed0a85dad921c683e9c0d25b072bcc5745ede0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 15 Jun 2012 18:22:05 -0700
Subject: rcu: Remove needless initialization

For global variables, C defaults all fields to zero.  The initialization
of the rcu_state structure's ->n_force_qs and ->n_force_qs_ngp fields
is therefore redundant, so this commit removes these initializations.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index ff992ac24e73..44a8fda9be86 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -73,8 +73,6 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
 	.orphan_donetail = &sname##_state.orphan_donelist, \
 	.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
 	.fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \
-	.n_force_qs = 0, \
-	.n_force_qs_ngp = 0, \
 	.name = #sname, \
 }
 
-- 
cgit v1.2.3


From cf3a9c4842b1e097dbe0854933c471d43dd24f69 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Tue, 29 May 2012 14:56:46 -0700
Subject: rcu: Increase rcu_barrier() concurrency

The traditional rcu_barrier() implementation has serialized all requests,
regardless of RCU flavor, and also does not coalesce concurrent requests.
In the past, this has been good and sufficient.

However, systems are getting larger and use of rcu_barrier() has been
increasing.  This commit therefore introduces a counter-based scheme
that allows _rcu_barrier() calls for the same flavor of RCU to take
advantage of each others' work.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree.c | 36 +++++++++++++++++++++++++++++++++++-
 kernel/rcutree.h |  2 ++
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 44a8fda9be86..6bb5d562253f 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -2294,13 +2294,41 @@ static void _rcu_barrier(struct rcu_state *rsp)
 	unsigned long flags;
 	struct rcu_data *rdp;
 	struct rcu_data rd;
+	unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done);
+	unsigned long snap_done;
 
 	init_rcu_head_on_stack(&rd.barrier_head);
 
 	/* Take mutex to serialize concurrent rcu_barrier() requests. */
 	mutex_lock(&rsp->barrier_mutex);
 
-	smp_mb();  /* Prevent any prior operations from leaking in. */
+	/*
+	 * Ensure that all prior references, including to ->n_barrier_done,
+	 * are ordered before the _rcu_barrier() machinery.
+	 */
+	smp_mb();  /* See above block comment. */
+
+	/*
+	 * Recheck ->n_barrier_done to see if others did our work for us.
+	 * This means checking ->n_barrier_done for an even-to-odd-to-even
+	 * transition.  The "if" expression below therefore rounds the old
+	 * value up to the next even number and adds two before comparing.
+	 */
+	snap_done = ACCESS_ONCE(rsp->n_barrier_done);
+	if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) {
+		smp_mb(); /* caller's subsequent code after above check. */
+		mutex_unlock(&rsp->barrier_mutex);
+		return;
+	}
+
+	/*
+	 * Increment ->n_barrier_done to avoid duplicate work.  Use
+	 * ACCESS_ONCE() to prevent the compiler from speculating
+	 * the increment to precede the early-exit check.
+	 */
+	ACCESS_ONCE(rsp->n_barrier_done)++;
+	WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
+	smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
 
 	/*
 	 * Initialize the count to one rather than to zero in order to
@@ -2371,6 +2399,12 @@ static void _rcu_barrier(struct rcu_state *rsp)
 	if (atomic_dec_and_test(&rsp->barrier_cpu_count))
 		complete(&rsp->barrier_completion);
 
+	/* Increment ->n_barrier_done to prevent duplicate work. */
+	smp_mb(); /* Keep increment after above mechanism. */
+	ACCESS_ONCE(rsp->n_barrier_done)++;
+	WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
+	smp_mb(); /* Keep increment before caller's subsequent code. */
+
 	/* Wait for all rcu_barrier_callback() callbacks to be invoked. */
 	wait_for_completion(&rsp->barrier_completion);
 
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 7641aec3e59c..be10286ad380 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -403,6 +403,8 @@ struct rcu_state {
 	struct mutex barrier_mutex;		/* Guards barrier fields. */
 	atomic_t barrier_cpu_count;		/* # CPUs waiting on. */
 	struct completion barrier_completion;	/* Wake at barrier end. */
+	unsigned long n_barrier_done;		/* ++ at start and end of */
+						/*  _rcu_barrier(). */
 	raw_spinlock_t fqslock;			/* Only one task forcing */
 						/*  quiescent states. */
 	unsigned long jiffies_force_qs;		/* Time at which to invoke */
-- 
cgit v1.2.3


From a83eff0a82a7f3f14fea477fd41e6c082e7fc96a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Wed, 23 May 2012 18:47:05 -0700
Subject: rcu: Add tracing for _rcu_barrier()

This commit adds event tracing for _rcu_barrier() execution.  This
is defined only if RCU_TRACE=y.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/trace/events/rcu.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/rcutree.c           | 29 ++++++++++++++++++++++++++++-
 2 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d274734b2aa4..5bde94d8585b 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -541,6 +541,50 @@ TRACE_EVENT(rcu_torture_read,
 		  __entry->rcutorturename, __entry->rhp)
 );
 
+/*
+ * Tracepoint for _rcu_barrier() execution.  The string "s" describes
+ * the _rcu_barrier phase:
+ *	"Begin": rcu_barrier_callback() started.
+ *	"Check": rcu_barrier_callback() checking for piggybacking.
+ *	"EarlyExit": rcu_barrier_callback() piggybacked, thus early exit.
+ *	"Inc1": rcu_barrier_callback() piggyback check counter incremented.
+ *	"Offline": rcu_barrier_callback() found offline CPU
+ *	"OnlineQ": rcu_barrier_callback() found online CPU with callbacks.
+ *	"OnlineNQ": rcu_barrier_callback() found online CPU, no callbacks.
+ *	"IRQ": An rcu_barrier_callback() callback posted on remote CPU.
+ *	"CB": An rcu_barrier_callback() invoked a callback, not the last.
+ *	"LastCB": An rcu_barrier_callback() invoked the last callback.
+ *	"Inc2": rcu_barrier_callback() piggyback check counter incremented.
+ * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument
+ * is the count of remaining callbacks, and "done" is the piggybacking count.
+ */
+TRACE_EVENT(rcu_barrier,
+
+	TP_PROTO(char *rcuname, char *s, int cpu, int cnt, unsigned long done),
+
+	TP_ARGS(rcuname, s, cpu, cnt, done),
+
+	TP_STRUCT__entry(
+		__field(char *, rcuname)
+		__field(char *, s)
+		__field(int, cpu)
+		__field(int, cnt)
+		__field(unsigned long, done)
+	),
+
+	TP_fast_assign(
+		__entry->rcuname = rcuname;
+		__entry->s = s;
+		__entry->cpu = cpu;
+		__entry->cnt = cnt;
+		__entry->done = done;
+	),
+
+	TP_printk("%s %s cpu %d remaining %d # %lu",
+		  __entry->rcuname, __entry->s, __entry->cpu, __entry->cnt,
+		  __entry->done)
+);
+
 #else /* #ifdef CONFIG_RCU_TRACE */
 
 #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
@@ -564,6 +608,7 @@ TRACE_EVENT(rcu_torture_read,
 #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \
 	do { } while (0)
 #define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
+#define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0)
 
 #endif /* #else #ifdef CONFIG_RCU_TRACE */
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 6bb5d562253f..dda43d826504 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -2259,6 +2259,17 @@ static int rcu_cpu_has_callbacks(int cpu)
 	       rcu_preempt_cpu_has_callbacks(cpu);
 }
 
+/*
+ * Helper function for _rcu_barrier() tracing.  If tracing is disabled,
+ * the compiler is expected to optimize this away.
+ */
+static void _rcu_barrier_trace(struct rcu_state *rsp, char *s,
+			       int cpu, unsigned long done)
+{
+	trace_rcu_barrier(rsp->name, s, cpu,
+			  atomic_read(&rsp->barrier_cpu_count), done);
+}
+
 /*
  * RCU callback function for _rcu_barrier().  If we are last, wake
  * up the task executing _rcu_barrier().
@@ -2268,8 +2279,12 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
 	struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head);
 	struct rcu_state *rsp = rdp->rsp;
 
-	if (atomic_dec_and_test(&rsp->barrier_cpu_count))
+	if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
+		_rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done);
 		complete(&rsp->barrier_completion);
+	} else {
+		_rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done);
+	}
 }
 
 /*
@@ -2280,6 +2295,7 @@ static void rcu_barrier_func(void *type)
 	struct rcu_state *rsp = type;
 	struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
 
+	_rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
 	atomic_inc(&rsp->barrier_cpu_count);
 	rsp->call(&rdp->barrier_head, rcu_barrier_callback);
 }
@@ -2298,6 +2314,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
 	unsigned long snap_done;
 
 	init_rcu_head_on_stack(&rd.barrier_head);
+	_rcu_barrier_trace(rsp, "Begin", -1, snap);
 
 	/* Take mutex to serialize concurrent rcu_barrier() requests. */
 	mutex_lock(&rsp->barrier_mutex);
@@ -2315,7 +2332,9 @@ static void _rcu_barrier(struct rcu_state *rsp)
 	 * value up to the next even number and adds two before comparing.
 	 */
 	snap_done = ACCESS_ONCE(rsp->n_barrier_done);
+	_rcu_barrier_trace(rsp, "Check", -1, snap_done);
 	if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) {
+		_rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
 		smp_mb(); /* caller's subsequent code after above check. */
 		mutex_unlock(&rsp->barrier_mutex);
 		return;
@@ -2328,6 +2347,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
 	 */
 	ACCESS_ONCE(rsp->n_barrier_done)++;
 	WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
+	_rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
 	smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
 
 	/*
@@ -2364,13 +2384,19 @@ static void _rcu_barrier(struct rcu_state *rsp)
 		preempt_disable();
 		rdp = per_cpu_ptr(rsp->rda, cpu);
 		if (cpu_is_offline(cpu)) {
+			_rcu_barrier_trace(rsp, "Offline", cpu,
+					   rsp->n_barrier_done);
 			preempt_enable();
 			while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen))
 				schedule_timeout_interruptible(1);
 		} else if (ACCESS_ONCE(rdp->qlen)) {
+			_rcu_barrier_trace(rsp, "OnlineQ", cpu,
+					   rsp->n_barrier_done);
 			smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
 			preempt_enable();
 		} else {
+			_rcu_barrier_trace(rsp, "OnlineNQ", cpu,
+					   rsp->n_barrier_done);
 			preempt_enable();
 		}
 	}
@@ -2403,6 +2429,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
 	smp_mb(); /* Keep increment after above mechanism. */
 	ACCESS_ONCE(rsp->n_barrier_done)++;
 	WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
+	_rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
 	smp_mb(); /* Keep increment before caller's subsequent code. */
 
 	/* Wait for all rcu_barrier_callback() callbacks to be invoked. */
-- 
cgit v1.2.3


From d7e187c8e9f30543f9cadfed094896ff414acb8f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Fri, 25 May 2012 11:49:15 -0700
Subject: rcu: Add rcu_barrier() statistics to debugfs tracing

This commit adds an rcubarrier file to RCU's debugfs statistical tracing
directory, providing diagnostic information on rcu_barrier().

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutree_trace.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index a3556a2d842c..057408be6c3b 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -46,6 +46,40 @@
 #define RCU_TREE_NONCORE
 #include "rcutree.h"
 
+static void print_rcubarrier(struct seq_file *m, struct rcu_state *rsp)
+{
+	seq_printf(m, "%c bcc: %d nbd: %lu\n",
+		   rsp->rcu_barrier_in_progress ? 'B' : '.',
+		   atomic_read(&rsp->barrier_cpu_count),
+		   rsp->n_barrier_done);
+}
+
+static int show_rcubarrier(struct seq_file *m, void *unused)
+{
+#ifdef CONFIG_TREE_PREEMPT_RCU
+	seq_puts(m, "rcu_preempt: ");
+	print_rcubarrier(m, &rcu_preempt_state);
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+	seq_puts(m, "rcu_sched: ");
+	print_rcubarrier(m, &rcu_sched_state);
+	seq_puts(m, "rcu_bh: ");
+	print_rcubarrier(m, &rcu_bh_state);
+	return 0;
+}
+
+static int rcubarrier_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcubarrier, NULL);
+}
+
+static const struct file_operations rcubarrier_fops = {
+	.owner = THIS_MODULE,
+	.open = rcubarrier_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
 #ifdef CONFIG_RCU_BOOST
 
 static char convert_kthread_status(unsigned int kthread_status)
@@ -453,6 +487,11 @@ static int __init rcutree_trace_init(void)
 	if (!rcudir)
 		goto free_out;
 
+	retval = debugfs_create_file("rcubarrier", 0444, rcudir,
+						NULL, &rcubarrier_fops);
+	if (!retval)
+		goto free_out;
+
 	retval = debugfs_create_file("rcudata", 0444, rcudir,
 						NULL, &rcudata_fops);
 	if (!retval)
-- 
cgit v1.2.3


From 1bca8cf1a2c3c6683b12ad28a3e826ca7a834978 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 12 Jun 2012 09:40:38 -0700
Subject: rcu: Remove unneeded __rcu_process_callbacks() argument

With the advent of __this_cpu_ptr(), it is no longer necessary to pass
both the rcu_state and rcu_data structures into __rcu_process_callbacks().
This commit therefore computes the rcu_data pointer from the rcu_state
pointer within __rcu_process_callbacks() so that callers can pass in
only the pointer to the rcu_state structure.  This paves the way for
linking the rcu_state structures together and iterating over them.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutree.c        | 8 ++++----
 kernel/rcutree_plugin.h | 3 +--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index dda43d826504..5376a156be8a 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1788,9 +1788,10 @@ unlock_fqs_ret:
  * whom the rdp belongs.
  */
 static void
-__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
+__rcu_process_callbacks(struct rcu_state *rsp)
 {
 	unsigned long flags;
+	struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
 
 	WARN_ON_ONCE(rdp->beenonline == 0);
 
@@ -1827,9 +1828,8 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
 static void rcu_process_callbacks(struct softirq_action *unused)
 {
 	trace_rcu_utilization("Start RCU core");
-	__rcu_process_callbacks(&rcu_sched_state,
-				&__get_cpu_var(rcu_sched_data));
-	__rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
+	__rcu_process_callbacks(&rcu_sched_state);
+	__rcu_process_callbacks(&rcu_bh_state);
 	rcu_preempt_process_callbacks();
 	trace_rcu_utilization("End RCU core");
 }
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 9cb3a68819fa..5a80cdd9a0a3 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -687,8 +687,7 @@ static void rcu_preempt_check_callbacks(int cpu)
  */
 static void rcu_preempt_process_callbacks(void)
 {
-	__rcu_process_callbacks(&rcu_preempt_state,
-				&__get_cpu_var(rcu_preempt_data));
+	__rcu_process_callbacks(&rcu_preempt_state);
 }
 
 #ifdef CONFIG_RCU_BOOST
-- 
cgit v1.2.3


From 6ce75a2326e6f8b3bdfb60e1de7934b89858e87b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 12 Jun 2012 11:01:13 -0700
Subject: rcu: Introduce for_each_rcu_flavor() and use it

The arrival of TREE_PREEMPT_RCU some years back included some ugly
code involving either #ifdef or #ifdef'ed wrapper functions to iterate
over all non-SRCU flavors of RCU.  This commit therefore introduces
a for_each_rcu_flavor() iterator over the rcu_state structures for each
flavor of RCU to clean up a bit of the ugliness.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree.c        |  53 +++++++++++++---------
 kernel/rcutree.h        |  12 +++--
 kernel/rcutree_plugin.h | 116 ------------------------------------------------
 3 files changed, 37 insertions(+), 144 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 5376a156be8a..b61c3ffc80e9 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -84,6 +84,7 @@ struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh);
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 
 static struct rcu_state *rcu_state;
+LIST_HEAD(rcu_struct_flavors);
 
 /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
 static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
@@ -860,9 +861,10 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
  */
 void rcu_cpu_stall_reset(void)
 {
-	rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2;
-	rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2;
-	rcu_preempt_stall_reset();
+	struct rcu_state *rsp;
+
+	for_each_rcu_flavor(rsp)
+		rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
 }
 
 static struct notifier_block rcu_panic_block = {
@@ -1827,10 +1829,11 @@ __rcu_process_callbacks(struct rcu_state *rsp)
  */
 static void rcu_process_callbacks(struct softirq_action *unused)
 {
+	struct rcu_state *rsp;
+
 	trace_rcu_utilization("Start RCU core");
-	__rcu_process_callbacks(&rcu_sched_state);
-	__rcu_process_callbacks(&rcu_bh_state);
-	rcu_preempt_process_callbacks();
+	for_each_rcu_flavor(rsp)
+		__rcu_process_callbacks(rsp);
 	trace_rcu_utilization("End RCU core");
 }
 
@@ -2241,9 +2244,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
  */
 static int rcu_pending(int cpu)
 {
-	return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) ||
-	       __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) ||
-	       rcu_preempt_pending(cpu);
+	struct rcu_state *rsp;
+
+	for_each_rcu_flavor(rsp)
+		if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu)))
+			return 1;
+	return 0;
 }
 
 /*
@@ -2253,10 +2259,13 @@ static int rcu_pending(int cpu)
  */
 static int rcu_cpu_has_callbacks(int cpu)
 {
+	struct rcu_state *rsp;
+
 	/* RCU callbacks either ready or pending? */
-	return per_cpu(rcu_sched_data, cpu).nxtlist ||
-	       per_cpu(rcu_bh_data, cpu).nxtlist ||
-	       rcu_preempt_cpu_has_callbacks(cpu);
+	for_each_rcu_flavor(rsp)
+		if (per_cpu_ptr(rsp->rda, cpu)->nxtlist)
+			return 1;
+	return 0;
 }
 
 /*
@@ -2551,9 +2560,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
 
 static void __cpuinit rcu_prepare_cpu(int cpu)
 {
-	rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
-	rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
-	rcu_preempt_init_percpu_data(cpu);
+	struct rcu_state *rsp;
+
+	for_each_rcu_flavor(rsp)
+		rcu_init_percpu_data(cpu, rsp,
+				     strcmp(rsp->name, "rcu_preempt") == 0);
 }
 
 /*
@@ -2565,6 +2576,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
 	long cpu = (long)hcpu;
 	struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
 	struct rcu_node *rnp = rdp->mynode;
+	struct rcu_state *rsp;
 
 	trace_rcu_utilization("Start CPU hotplug");
 	switch (action) {
@@ -2589,18 +2601,16 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
 		 * touch any data without introducing corruption. We send the
 		 * dying CPU's callbacks to an arbitrarily chosen online CPU.
 		 */
-		rcu_cleanup_dying_cpu(&rcu_bh_state);
-		rcu_cleanup_dying_cpu(&rcu_sched_state);
-		rcu_preempt_cleanup_dying_cpu();
+		for_each_rcu_flavor(rsp)
+			rcu_cleanup_dying_cpu(rsp);
 		rcu_cleanup_after_idle(cpu);
 		break;
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 	case CPU_UP_CANCELED:
 	case CPU_UP_CANCELED_FROZEN:
-		rcu_cleanup_dead_cpu(cpu, &rcu_bh_state);
-		rcu_cleanup_dead_cpu(cpu, &rcu_sched_state);
-		rcu_preempt_cleanup_dead_cpu(cpu);
+		for_each_rcu_flavor(rsp)
+			rcu_cleanup_dead_cpu(cpu, rsp);
 		break;
 	default:
 		break;
@@ -2717,6 +2727,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
 		per_cpu_ptr(rsp->rda, i)->mynode = rnp;
 		rcu_boot_init_percpu_data(i, rsp);
 	}
+	list_add(&rsp->flavors, &rcu_struct_flavors);
 }
 
 /*
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index be10286ad380..b92c4550a6e6 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -422,8 +422,13 @@ struct rcu_state {
 	unsigned long gp_max;			/* Maximum GP duration in */
 						/*  jiffies. */
 	char *name;				/* Name of structure. */
+	struct list_head flavors;		/* List of RCU flavors. */
 };
 
+extern struct list_head rcu_struct_flavors;
+#define for_each_rcu_flavor(rsp) \
+	list_for_each_entry((rsp), &rcu_struct_flavors, flavors)
+
 /* Return values for rcu_preempt_offline_tasks(). */
 
 #define RCU_OFL_TASKS_NORM_GP	0x1		/* Tasks blocking normal */
@@ -466,25 +471,18 @@ static void rcu_stop_cpu_kthread(int cpu);
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 static void rcu_print_detail_task_stall(struct rcu_state *rsp);
 static int rcu_print_task_stall(struct rcu_node *rnp);
-static void rcu_preempt_stall_reset(void);
 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
 #ifdef CONFIG_HOTPLUG_CPU
 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
 				     struct rcu_node *rnp,
 				     struct rcu_data *rdp);
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
-static void rcu_preempt_cleanup_dead_cpu(int cpu);
 static void rcu_preempt_check_callbacks(int cpu);
-static void rcu_preempt_process_callbacks(void);
 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 			       bool wake);
 #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
-static int rcu_preempt_pending(int cpu);
-static int rcu_preempt_cpu_has_callbacks(int cpu);
-static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
-static void rcu_preempt_cleanup_dying_cpu(void);
 static void __init __rcu_init_preempt(void);
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 5a80cdd9a0a3..d18b4d383afe 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -544,16 +544,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
 	return ndetected;
 }
 
-/*
- * Suppress preemptible RCU's CPU stall warnings by pushing the
- * time of the next stall-warning message comfortably far into the
- * future.
- */
-static void rcu_preempt_stall_reset(void)
-{
-	rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
-}
-
 /*
  * Check that the list of blocked tasks for the newly completed grace
  * period is in fact empty.  It is a serious bug to complete a grace
@@ -654,14 +644,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
 
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 
-/*
- * Do CPU-offline processing for preemptible RCU.
- */
-static void rcu_preempt_cleanup_dead_cpu(int cpu)
-{
-	rcu_cleanup_dead_cpu(cpu, &rcu_preempt_state);
-}
-
 /*
  * Check for a quiescent state from the current CPU.  When a task blocks,
  * the task is recorded in the corresponding CPU's rcu_node structure,
@@ -682,14 +664,6 @@ static void rcu_preempt_check_callbacks(int cpu)
 		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
 }
 
-/*
- * Process callbacks for preemptible RCU.
- */
-static void rcu_preempt_process_callbacks(void)
-{
-	__rcu_process_callbacks(&rcu_preempt_state);
-}
-
 #ifdef CONFIG_RCU_BOOST
 
 static void rcu_preempt_do_callbacks(void)
@@ -921,24 +895,6 @@ mb_ret:
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 
-/*
- * Check to see if there is any immediate preemptible-RCU-related work
- * to be done.
- */
-static int rcu_preempt_pending(int cpu)
-{
-	return __rcu_pending(&rcu_preempt_state,
-			     &per_cpu(rcu_preempt_data, cpu));
-}
-
-/*
- * Does preemptible RCU have callbacks on this CPU?
- */
-static int rcu_preempt_cpu_has_callbacks(int cpu)
-{
-	return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
-}
-
 /**
  * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
  */
@@ -948,23 +904,6 @@ void rcu_barrier(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier);
 
-/*
- * Initialize preemptible RCU's per-CPU data.
- */
-static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
-{
-	rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
-}
-
-/*
- * Move preemptible RCU's callbacks from dying CPU to other online CPU
- * and record a quiescent state.
- */
-static void rcu_preempt_cleanup_dying_cpu(void)
-{
-	rcu_cleanup_dying_cpu(&rcu_preempt_state);
-}
-
 /*
  * Initialize preemptible RCU's state structures.
  */
@@ -1049,14 +988,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
 	return 0;
 }
 
-/*
- * Because preemptible RCU does not exist, there is no need to suppress
- * its CPU stall warnings.
- */
-static void rcu_preempt_stall_reset(void)
-{
-}
-
 /*
  * Because there is no preemptible RCU, there can be no readers blocked,
  * so there is no need to check for blocked tasks.  So check only for
@@ -1084,14 +1015,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
 
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 
-/*
- * Because preemptible RCU does not exist, it never needs CPU-offline
- * processing.
- */
-static void rcu_preempt_cleanup_dead_cpu(int cpu)
-{
-}
-
 /*
  * Because preemptible RCU does not exist, it never has any callbacks
  * to check.
@@ -1100,14 +1023,6 @@ static void rcu_preempt_check_callbacks(int cpu)
 {
 }
 
-/*
- * Because preemptible RCU does not exist, it never has any callbacks
- * to process.
- */
-static void rcu_preempt_process_callbacks(void)
-{
-}
-
 /*
  * Queue an RCU callback for lazy invocation after a grace period.
  * This will likely be later named something like "call_rcu_lazy()",
@@ -1148,22 +1063,6 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 
-/*
- * Because preemptible RCU does not exist, it never has any work to do.
- */
-static int rcu_preempt_pending(int cpu)
-{
-	return 0;
-}
-
-/*
- * Because preemptible RCU does not exist, it never has callbacks
- */
-static int rcu_preempt_cpu_has_callbacks(int cpu)
-{
-	return 0;
-}
-
 /*
  * Because preemptible RCU does not exist, rcu_barrier() is just
  * another name for rcu_barrier_sched().
@@ -1174,21 +1073,6 @@ void rcu_barrier(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier);
 
-/*
- * Because preemptible RCU does not exist, there is no per-CPU
- * data to initialize.
- */
-static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
-{
-}
-
-/*
- * Because there is no preemptible RCU, there is no cleanup to do.
- */
-static void rcu_preempt_cleanup_dying_cpu(void)
-{
-}
-
 /*
  * Because preemptible RCU does not exist, it need not be initialized.
  */
-- 
cgit v1.2.3


From c0cc962da3e7770feb3665f087ea3e23d8c15479 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 12 Jun 2012 12:25:39 -0700
Subject: rcu: Use for_each_rcu_flavor() in TREE_RCU tracing

This commit applies the new for_each_rcu_flavor() macro to the
kernel/rcutree_trace.c file.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree_trace.c | 116 ++++++++++++++++++-------------------------------
 1 file changed, 43 insertions(+), 73 deletions(-)

diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 057408be6c3b..a16ddbd6fdc4 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -46,24 +46,15 @@
 #define RCU_TREE_NONCORE
 #include "rcutree.h"
 
-static void print_rcubarrier(struct seq_file *m, struct rcu_state *rsp)
-{
-	seq_printf(m, "%c bcc: %d nbd: %lu\n",
-		   rsp->rcu_barrier_in_progress ? 'B' : '.',
-		   atomic_read(&rsp->barrier_cpu_count),
-		   rsp->n_barrier_done);
-}
-
 static int show_rcubarrier(struct seq_file *m, void *unused)
 {
-#ifdef CONFIG_TREE_PREEMPT_RCU
-	seq_puts(m, "rcu_preempt: ");
-	print_rcubarrier(m, &rcu_preempt_state);
-#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-	seq_puts(m, "rcu_sched: ");
-	print_rcubarrier(m, &rcu_sched_state);
-	seq_puts(m, "rcu_bh: ");
-	print_rcubarrier(m, &rcu_bh_state);
+	struct rcu_state *rsp;
+
+	for_each_rcu_flavor(rsp)
+		seq_printf(m, "%s: %c bcc: %d nbd: %lu\n",
+			   rsp->name, rsp->rcu_barrier_in_progress ? 'B' : '.',
+			   atomic_read(&rsp->barrier_cpu_count),
+			   rsp->n_barrier_done);
 	return 0;
 }
 
@@ -129,24 +120,16 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
 		   rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
 }
 
-#define PRINT_RCU_DATA(name, func, m) \
-	do { \
-		int _p_r_d_i; \
-		\
-		for_each_possible_cpu(_p_r_d_i) \
-			func(m, &per_cpu(name, _p_r_d_i)); \
-	} while (0)
-
 static int show_rcudata(struct seq_file *m, void *unused)
 {
-#ifdef CONFIG_TREE_PREEMPT_RCU
-	seq_puts(m, "rcu_preempt:\n");
-	PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data, m);
-#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-	seq_puts(m, "rcu_sched:\n");
-	PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m);
-	seq_puts(m, "rcu_bh:\n");
-	PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data, m);
+	int cpu;
+	struct rcu_state *rsp;
+
+	for_each_rcu_flavor(rsp) {
+		seq_printf(m, "%s:\n", rsp->name);
+		for_each_possible_cpu(cpu)
+			print_one_rcu_data(m, per_cpu_ptr(rsp->rda, cpu));
+	}
 	return 0;
 }
 
@@ -200,6 +183,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
 
 static int show_rcudata_csv(struct seq_file *m, void *unused)
 {
+	int cpu;
+	struct rcu_state *rsp;
+
 	seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
 	seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
 	seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\"");
@@ -207,14 +193,11 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
 	seq_puts(m, "\"kt\",\"ktl\"");
 #endif /* #ifdef CONFIG_RCU_BOOST */
 	seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n");
-#ifdef CONFIG_TREE_PREEMPT_RCU
-	seq_puts(m, "\"rcu_preempt:\"\n");
-	PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m);
-#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-	seq_puts(m, "\"rcu_sched:\"\n");
-	PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m);
-	seq_puts(m, "\"rcu_bh:\"\n");
-	PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data_csv, m);
+	for_each_rcu_flavor(rsp) {
+		seq_printf(m, "\"%s:\"\n", rsp->name);
+		for_each_possible_cpu(cpu)
+			print_one_rcu_data_csv(m, per_cpu_ptr(rsp->rda, cpu));
+	}
 	return 0;
 }
 
@@ -304,9 +287,9 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 	struct rcu_node *rnp;
 
 	gpnum = rsp->gpnum;
-	seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x "
+	seq_printf(m, "%s: c=%lu g=%lu s=%d jfq=%ld j=%x "
 		      "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
-		   rsp->completed, gpnum, rsp->fqs_state,
+		   rsp->name, rsp->completed, gpnum, rsp->fqs_state,
 		   (long)(rsp->jiffies_force_qs - jiffies),
 		   (int)(jiffies & 0xffff),
 		   rsp->n_force_qs, rsp->n_force_qs_ngp,
@@ -329,14 +312,10 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 
 static int show_rcuhier(struct seq_file *m, void *unused)
 {
-#ifdef CONFIG_TREE_PREEMPT_RCU
-	seq_puts(m, "rcu_preempt:\n");
-	print_one_rcu_state(m, &rcu_preempt_state);
-#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-	seq_puts(m, "rcu_sched:\n");
-	print_one_rcu_state(m, &rcu_sched_state);
-	seq_puts(m, "rcu_bh:\n");
-	print_one_rcu_state(m, &rcu_bh_state);
+	struct rcu_state *rsp;
+
+	for_each_rcu_flavor(rsp)
+		print_one_rcu_state(m, rsp);
 	return 0;
 }
 
@@ -377,11 +356,10 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
 
 static int show_rcugp(struct seq_file *m, void *unused)
 {
-#ifdef CONFIG_TREE_PREEMPT_RCU
-	show_one_rcugp(m, &rcu_preempt_state);
-#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-	show_one_rcugp(m, &rcu_sched_state);
-	show_one_rcugp(m, &rcu_bh_state);
+	struct rcu_state *rsp;
+
+	for_each_rcu_flavor(rsp)
+		show_one_rcugp(m, rsp);
 	return 0;
 }
 
@@ -416,28 +394,20 @@ static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
 		   rdp->n_rp_need_nothing);
 }
 
-static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp)
+static int show_rcu_pending(struct seq_file *m, void *unused)
 {
 	int cpu;
 	struct rcu_data *rdp;
-
-	for_each_possible_cpu(cpu) {
-		rdp = per_cpu_ptr(rsp->rda, cpu);
-		if (rdp->beenonline)
-			print_one_rcu_pending(m, rdp);
+	struct rcu_state *rsp;
+
+	for_each_rcu_flavor(rsp) {
+		seq_printf(m, "%s:\n", rsp->name);
+		for_each_possible_cpu(cpu) {
+			rdp = per_cpu_ptr(rsp->rda, cpu);
+			if (rdp->beenonline)
+				print_one_rcu_pending(m, rdp);
+		}
 	}
-}
-
-static int show_rcu_pending(struct seq_file *m, void *unused)
-{
-#ifdef CONFIG_TREE_PREEMPT_RCU
-	seq_puts(m, "rcu_preempt:\n");
-	print_rcu_pendings(m, &rcu_preempt_state);
-#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-	seq_puts(m, "rcu_sched:\n");
-	print_rcu_pendings(m, &rcu_sched_state);
-	seq_puts(m, "rcu_bh:\n");
-	print_rcu_pendings(m, &rcu_bh_state);
 	return 0;
 }
 
-- 
cgit v1.2.3


From ff015030c939f0bec68fa9b8898da3aaa7fe55ea Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 12 Jun 2012 13:16:58 -0700
Subject: rcu: RCU_SAVE_DYNTICK code no longer ever dead

Before RCU had unified idle, the RCU_SAVE_DYNTICK leg of the switch
statement in force_quiescent_state() was dead code for CONFIG_NO_HZ=n
kernel builds.  With unified idle, the code is never dead.  This commit
therefore removes the "if" statement designed to make gcc aware of when
the code was and was not dead.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutree.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index b61c3ffc80e9..967b4bed2cf3 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1747,8 +1747,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
 		break; /* grace period idle or initializing, ignore. */
 
 	case RCU_SAVE_DYNTICK:
-		if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
-			break; /* So gcc recognizes the dead code. */
 
 		raw_spin_unlock(&rnp->lock);  /* irqs remain disabled */
 
-- 
cgit v1.2.3


From 74d874e7bdc5b09cedcc7da0de44db25888eea10 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 7 May 2012 13:43:30 -0700
Subject: rcu: Update documentation to cover call_srcu() and srcu_barrier().

The advent of call_srcu() and srcu_barrier() obsoleted some of the
documentation, so this commit brings that up to date.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/RCU/checklist.txt  | 39 ++++++++++++++++++++-------------------
 Documentation/RCU/rcubarrier.txt | 15 ++++-----------
 Documentation/RCU/torture.txt    |  9 +++++++++
 Documentation/RCU/whatisRCU.txt  |  6 +++---
 4 files changed, 36 insertions(+), 33 deletions(-)

diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 5c8d74968090..fc103d7a0474 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -162,9 +162,9 @@ over a rather long period of time, but improvements are always welcome!
 		when publicizing a pointer to a structure that can
 		be traversed by an RCU read-side critical section.
 
-5.	If call_rcu(), or a related primitive such as call_rcu_bh() or
-	call_rcu_sched(), is used, the callback function must be
-	written to be called from softirq context.  In particular,
+5.	If call_rcu(), or a related primitive such as call_rcu_bh(),
+	call_rcu_sched(), or call_srcu() is used, the callback function
+	must be written to be called from softirq context.  In particular,
 	it cannot block.
 
 6.	Since synchronize_rcu() can block, it cannot be called from
@@ -202,11 +202,12 @@ over a rather long period of time, but improvements are always welcome!
 	updater uses call_rcu_sched() or synchronize_sched(), then
 	the corresponding readers must disable preemption, possibly
 	by calling rcu_read_lock_sched() and rcu_read_unlock_sched().
-	If the updater uses synchronize_srcu(), the the corresponding
-	readers must use srcu_read_lock() and srcu_read_unlock(),
-	and with the same srcu_struct.	The rules for the expedited
-	primitives are the same as for their non-expedited counterparts.
-	Mixing things up will result in confusion and broken kernels.
+	If the updater uses synchronize_srcu() or call_srcu(),
+	the the corresponding readers must use srcu_read_lock() and
+	srcu_read_unlock(), and with the same srcu_struct.  The rules for
+	the expedited primitives are the same as for their non-expedited
+	counterparts.  Mixing things up will result in confusion and
+	broken kernels.
 
 	One exception to this rule: rcu_read_lock() and rcu_read_unlock()
 	may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
@@ -333,14 +334,14 @@ over a rather long period of time, but improvements are always welcome!
 	victim CPU from ever going offline.)
 
 14.	SRCU (srcu_read_lock(), srcu_read_unlock(), srcu_dereference(),
-	synchronize_srcu(), and synchronize_srcu_expedited()) may only
-	be invoked from process context.  Unlike other forms of RCU, it
-	-is- permissible to block in an SRCU read-side critical section
-	(demarked by srcu_read_lock() and srcu_read_unlock()), hence the
-	"SRCU": "sleepable RCU".  Please note that if you don't need
-	to sleep in read-side critical sections, you should be using
-	RCU rather than SRCU, because RCU is almost always faster and
-	easier to use than is SRCU.
+	synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu())
+	may only be invoked from process context.  Unlike other forms of
+	RCU, it -is- permissible to block in an SRCU read-side critical
+	section (demarked by srcu_read_lock() and srcu_read_unlock()),
+	hence the "SRCU": "sleepable RCU".  Please note that if you
+	don't need to sleep in read-side critical sections, you should be
+	using RCU rather than SRCU, because RCU is almost always faster
+	and easier to use than is SRCU.
 
 	If you need to enter your read-side critical section in a
 	hardirq or exception handler, and then exit that same read-side
@@ -353,8 +354,8 @@ over a rather long period of time, but improvements are always welcome!
 	cleanup_srcu_struct().	These are passed a "struct srcu_struct"
 	that defines the scope of a given SRCU domain.	Once initialized,
 	the srcu_struct is passed to srcu_read_lock(), srcu_read_unlock()
-	synchronize_srcu(), and synchronize_srcu_expedited().  A given
-	synchronize_srcu() waits only for SRCU read-side critical
+	synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu().
+	A given synchronize_srcu() waits only for SRCU read-side critical
 	sections governed by srcu_read_lock() and srcu_read_unlock()
 	calls that have been passed the same srcu_struct.  This property
 	is what makes sleeping read-side critical sections tolerable --
@@ -374,7 +375,7 @@ over a rather long period of time, but improvements are always welcome!
 	requiring SRCU's read-side deadlock immunity or low read-side
 	realtime latency.
 
-	Note that, rcu_assign_pointer() relates to SRCU just as they do
+	Note that, rcu_assign_pointer() relates to SRCU just as it does
 	to other forms of RCU.
 
 15.	The whole point of call_rcu(), synchronize_rcu(), and friends
diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt
index e439a0edee22..38428c125135 100644
--- a/Documentation/RCU/rcubarrier.txt
+++ b/Documentation/RCU/rcubarrier.txt
@@ -79,8 +79,6 @@ complete. Pseudo-code using rcu_barrier() is as follows:
    2. Execute rcu_barrier().
    3. Allow the module to be unloaded.
 
-Quick Quiz #1: Why is there no srcu_barrier()?
-
 The rcutorture module makes use of rcu_barrier in its exit function
 as follows:
 
@@ -162,7 +160,7 @@ for any pre-existing callbacks to complete.
 Then lines 55-62 print status and do operation-specific cleanup, and
 then return, permitting the module-unload operation to be completed.
 
-Quick Quiz #2: Is there any other situation where rcu_barrier() might
+Quick Quiz #1: Is there any other situation where rcu_barrier() might
 	be required?
 
 Your module might have additional complications. For example, if your
@@ -242,7 +240,7 @@ reaches zero, as follows:
  4 complete(&rcu_barrier_completion);
  5 }
 
-Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes
+Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
 	immediately (thus incrementing rcu_barrier_cpu_count to the
 	value one), but the other CPU's rcu_barrier_func() invocations
 	are delayed for a full grace period? Couldn't this result in
@@ -259,12 +257,7 @@ so that your module may be safely unloaded.
 
 Answers to Quick Quizzes
 
-Quick Quiz #1: Why is there no srcu_barrier()?
-
-Answer: Since there is no call_srcu(), there can be no outstanding SRCU
-	callbacks. Therefore, there is no need to wait for them.
-
-Quick Quiz #2: Is there any other situation where rcu_barrier() might
+Quick Quiz #1: Is there any other situation where rcu_barrier() might
 	be required?
 
 Answer: Interestingly enough, rcu_barrier() was not originally
@@ -278,7 +271,7 @@ Answer: Interestingly enough, rcu_barrier() was not originally
 	implementing rcutorture, and found that rcu_barrier() solves
 	this problem as well.
 
-Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes
+Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
 	immediately (thus incrementing rcu_barrier_cpu_count to the
 	value one), but the other CPU's rcu_barrier_func() invocations
 	are delayed for a full grace period? Couldn't this result in
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 4ddf3913fd8c..7dce8a17eac2 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -174,11 +174,20 @@ torture_type	The type of RCU to test, with string values as follows:
 			and synchronize_rcu_bh_expedited().
 
 		"srcu": srcu_read_lock(), srcu_read_unlock() and
+			call_srcu().
+
+		"srcu_sync": srcu_read_lock(), srcu_read_unlock() and
 			synchronize_srcu().
 
 		"srcu_expedited": srcu_read_lock(), srcu_read_unlock() and
 			synchronize_srcu_expedited().
 
+		"srcu_raw": srcu_read_lock_raw(), srcu_read_unlock_raw(),
+			and call_srcu().
+
+		"srcu_raw_sync": srcu_read_lock_raw(), srcu_read_unlock_raw(),
+			and synchronize_srcu().
+
 		"sched": preempt_disable(), preempt_enable(), and
 			call_rcu_sched().
 
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 6bbe8dcdc3da..69ee188515e7 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -833,9 +833,9 @@ sched:	Critical sections	Grace period		Barrier
 
 SRCU:	Critical sections	Grace period		Barrier
 
-	srcu_read_lock		synchronize_srcu	N/A
-	srcu_read_unlock	synchronize_srcu_expedited
-	srcu_read_lock_raw
+	srcu_read_lock		synchronize_srcu	srcu_barrier
+	srcu_read_unlock	call_srcu
+	srcu_read_lock_raw	synchronize_srcu_expedited
 	srcu_read_unlock_raw
 	srcu_dereference
 
-- 
cgit v1.2.3


From 751a68b2e46fe11a42450cb55a16e8065eddec7e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 7 May 2012 13:44:44 -0700
Subject: rcu: Rationalize ordering of torture_ops list

Move the raw SRCU interfaces out of the middle of the normal SRCU
interfaces.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutorture.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index e66b34ab7555..9850479f319d 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -1908,8 +1908,8 @@ rcu_torture_init(void)
 	static struct rcu_torture_ops *torture_ops[] =
 		{ &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
 		  &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
-		  &srcu_ops, &srcu_sync_ops, &srcu_raw_ops,
-		  &srcu_raw_sync_ops, &srcu_expedited_ops,
+		  &srcu_ops, &srcu_sync_ops, &srcu_expedited_ops,
+		  &srcu_raw_ops, &srcu_raw_sync_ops,
 		  &sched_ops, &sched_sync_ops, &sched_expedited_ops, };
 
 	mutex_lock(&fullstop_mutex);
-- 
cgit v1.2.3


From e3f8d3788ed7cf55946030dc9b76e73edb111602 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 8 May 2012 10:21:50 -0700
Subject: rcu: Test srcu_barrier() from rcutorture test suite

SRCU now has a call_srcu() and an srcu_barrier(), but rcutorture does not
test them.  This commit adds the machinery to allow rcutorture's existing
tests for call_rcu() and rcu_barrier() to apply to the SRCU equivalents.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutorture.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 9850479f319d..7b6935e0cee3 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -635,6 +635,17 @@ static void srcu_torture_synchronize(void)
 	synchronize_srcu(&srcu_ctl);
 }
 
+static void srcu_torture_call(struct rcu_head *head,
+			      void (*func)(struct rcu_head *head))
+{
+	call_srcu(&srcu_ctl, head, func);
+}
+
+static void srcu_torture_barrier(void)
+{
+	srcu_barrier(&srcu_ctl);
+}
+
 static int srcu_torture_stats(char *page)
 {
 	int cnt = 0;
@@ -661,8 +672,8 @@ static struct rcu_torture_ops srcu_ops = {
 	.completed	= srcu_torture_completed,
 	.deferred_free	= srcu_torture_deferred_free,
 	.sync		= srcu_torture_synchronize,
-	.call		= NULL,
-	.cb_barrier	= NULL,
+	.call		= srcu_torture_call,
+	.cb_barrier	= srcu_torture_barrier,
 	.stats		= srcu_torture_stats,
 	.name		= "srcu"
 };
-- 
cgit v1.2.3


From c6ebcbb60c8c68a88160fe54302e851700d1362c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Mon, 28 May 2012 19:21:41 -0700
Subject: rcu: Fix bug in rcu_barrier() torture test

The child threads in the rcu_torture_barrier_cbs() are improperly
synchronized, which can cause the rcu_barrier() tests to hang.  The
failure mode is as follows:

1.	CPU 0 running in rcu_torture_barrier() sets barrier_cbs_count
    	to n_barrier_cbs.

2.	CPU 1 running in rcu_torture_barrier_cbs() wakes up, posts
    	its RCU callback, and atomically decrements barrier_cbs_count.
    	Because barrier_cbs_count is not zero, it does not do the wake_up().

3.	CPU 2 running in rcu_torture_barrier_cbs() wakes up, but
    	finds that barrier_cbs_count is not equal to n_barrier_cbs,
    	and so returns to sleep.

4.	The value of barrier_cbs_count therefore never reaches zero,
    	which causes the test to hang.

This commit therefore uses a phase variable to coordinate the test,
preventing this scenario from occurring.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutorture.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 7b6935e0cee3..f7fe73e59c9f 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -206,6 +206,7 @@ static unsigned long boost_starttime;	/* jiffies of next boost test start. */
 DEFINE_MUTEX(boost_mutex);		/* protect setting boost_starttime */
 					/*  and boost task create/destroy. */
 static atomic_t barrier_cbs_count;	/* Barrier callbacks registered. */
+static bool barrier_phase;		/* Test phase. */
 static atomic_t barrier_cbs_invoked;	/* Barrier callbacks invoked. */
 static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
 static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
@@ -1642,6 +1643,7 @@ void rcu_torture_barrier_cbf(struct rcu_head *rcu)
 static int rcu_torture_barrier_cbs(void *arg)
 {
 	long myid = (long)arg;
+	bool lastphase = 0;
 	struct rcu_head rcu;
 
 	init_rcu_head_on_stack(&rcu);
@@ -1649,9 +1651,11 @@ static int rcu_torture_barrier_cbs(void *arg)
 	set_user_nice(current, 19);
 	do {
 		wait_event(barrier_cbs_wq[myid],
-			   atomic_read(&barrier_cbs_count) == n_barrier_cbs ||
+			   barrier_phase != lastphase ||
 			   kthread_should_stop() ||
 			   fullstop != FULLSTOP_DONTSTOP);
+		lastphase = barrier_phase;
+		smp_mb(); /* ensure barrier_phase load before ->call(). */
 		if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
 			break;
 		cur_ops->call(&rcu, rcu_torture_barrier_cbf);
@@ -1676,7 +1680,8 @@ static int rcu_torture_barrier(void *arg)
 	do {
 		atomic_set(&barrier_cbs_invoked, 0);
 		atomic_set(&barrier_cbs_count, n_barrier_cbs);
-		/* wake_up() path contains the required barriers. */
+		smp_mb(); /* Ensure barrier_phase after prior assignments. */
+		barrier_phase = !barrier_phase;
 		for (i = 0; i < n_barrier_cbs; i++)
 			wake_up(&barrier_cbs_wq[i]);
 		wait_event(barrier_wq,
-- 
cgit v1.2.3


From 143aa672f4fc643420c8325ad09c379ed33a27cf Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Thu, 24 May 2012 18:17:48 -0700
Subject: rcu: Fix diagnostic-printk typo in rcutorture

The rcu_torture_barrier() function has a copy-and-paste typo in the
string passed to rcutorture_shutdown_absorb(), which this commit fixes.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutorture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index f7fe73e59c9f..045a3dc233ea 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -1700,7 +1700,7 @@ static int rcu_torture_barrier(void *arg)
 		schedule_timeout_interruptible(HZ / 10);
 	} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
 	VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping");
-	rcutorture_shutdown_absorb("rcu_torture_barrier_cbs");
+	rcutorture_shutdown_absorb("rcu_torture_barrier");
 	while (!kthread_should_stop())
 		schedule_timeout_interruptible(1);
 	return 0;
-- 
cgit v1.2.3


From 72472a02a9c4507ef54d03d71bb253c26015f52c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Tue, 29 May 2012 17:50:51 -0700
Subject: rcu: Make rcutorture fakewriters invoke rcu_barrier()

The current rcutorture rcu_barrier() testing never intentionally runs
more than one instance of rcu_barrier() at a given time.  This fails
to test the the shiny new concurrency features of rcu_barrier().  This
commit therefore modifies the rcutorture fakewriter kthread to randomly
invoke rcu_barrier() rather than the usual synchronize_rcu().

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutorture.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 045a3dc233ea..c279ee920947 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -1025,7 +1025,11 @@ rcu_torture_fakewriter(void *arg)
 	do {
 		schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
 		udelay(rcu_random(&rand) & 0x3ff);
-		cur_ops->sync();
+		if (cur_ops->cb_barrier != NULL &&
+		    rcu_random(&rand) % (nfakewriters * 8) == 0)
+			cur_ops->cb_barrier();
+		else
+			cur_ops->sync();
 		rcu_stutter_wait("rcu_torture_fakewriter");
 	} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
 
-- 
cgit v1.2.3


From 285fe29481d865ae381ad3924c80894e6968c2d8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Wed, 9 May 2012 08:45:12 -0700
Subject: rcu: Fix detection of abruptly-ending stall

The code that attempts to identify stalls that end just as we detect
them is broken by both flavors of initialization failure.  This commit
therefore properly initializes and computes the count of the number
of reasons why the RCU grace period is stalled.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 4b97bba7396e..dc8c5284fe06 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -733,7 +733,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
 	int cpu;
 	long delta;
 	unsigned long flags;
-	int ndetected;
+	int ndetected = 0;
 	struct rcu_node *rnp = rcu_get_root(rsp);
 
 	/* Only let one CPU complain about others per time interval. */
@@ -774,7 +774,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
 	 */
 	rnp = rcu_get_root(rsp);
 	raw_spin_lock_irqsave(&rnp->lock, flags);
-	ndetected = rcu_print_task_stall(rnp);
+	ndetected += rcu_print_task_stall(rnp);
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 
 	print_cpu_stall_info_end();
-- 
cgit v1.2.3


From 3f5d3ea64f1783f0d4ea0d35890ae3297f045a8b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Wed, 9 May 2012 15:39:56 -0700
Subject: rcu: Consolidate duplicate callback-list initialization

There are a couple of open-coded initializations of the rcu_data
structure's RCU callback list.  This commit therefore consolidates
them into a new init_callback_list() function.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutree.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index dc8c5284fe06..81e0394e46af 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -936,6 +936,18 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
 	return ret;
 }
 
+/*
+ * Initialize the specified rcu_data structure's callback list to empty.
+ */
+static void init_callback_list(struct rcu_data *rdp)
+{
+	int i;
+
+	rdp->nxtlist = NULL;
+	for (i = 0; i < RCU_NEXT_SIZE; i++)
+		rdp->nxttail[i] = &rdp->nxtlist;
+}
+
 /*
  * Advance this CPU's callbacks, but only if the current grace period
  * has ended.  This may be called only from the CPU to whom the rdp
@@ -1328,8 +1340,6 @@ static void
 rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
 			  struct rcu_node *rnp, struct rcu_data *rdp)
 {
-	int i;
-
 	/*
 	 * Orphan the callbacks.  First adjust the counts.  This is safe
 	 * because ->onofflock excludes _rcu_barrier()'s adoption of
@@ -1369,9 +1379,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
 	}
 
 	/* Finally, initialize the rcu_data structure's list to empty.  */
-	rdp->nxtlist = NULL;
-	for (i = 0; i < RCU_NEXT_SIZE; i++)
-		rdp->nxttail[i] = &rdp->nxtlist;
+	init_callback_list(rdp);
 }
 
 /*
@@ -2407,16 +2415,13 @@ static void __init
 rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
 {
 	unsigned long flags;
-	int i;
 	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
 	struct rcu_node *rnp = rcu_get_root(rsp);
 
 	/* Set up local state, ensuring consistent view of global state. */
 	raw_spin_lock_irqsave(&rnp->lock, flags);
 	rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
-	rdp->nxtlist = NULL;
-	for (i = 0; i < RCU_NEXT_SIZE; i++)
-		rdp->nxttail[i] = &rdp->nxtlist;
+	init_callback_list(rdp);
 	rdp->qlen_lazy = 0;
 	rdp->qlen = 0;
 	rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
-- 
cgit v1.2.3


From 1d1fb395f6dbc07b36285bbedcf01a73b57f7cb5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Wed, 9 May 2012 15:44:42 -0700
Subject: rcu: Add ACCESS_ONCE() to ->qlen accesses

The _rcu_barrier() function accesses other CPUs' rcu_data structure's
->qlen field without benefit of locking.  This commit therefore adds
the required ACCESS_ONCE() wrappers around accesses and updates that
need it.

ACCESS_ONCE() is not needed when a CPU accesses its own ->qlen, or
in code that cannot run while _rcu_barrier() is sampling ->qlen fields.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutree.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 81e0394e46af..89addada3e3a 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1350,7 +1350,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
 		rsp->qlen += rdp->qlen;
 		rdp->n_cbs_orphaned += rdp->qlen;
 		rdp->qlen_lazy = 0;
-		rdp->qlen = 0;
+		ACCESS_ONCE(rdp->qlen) = 0;
 	}
 
 	/*
@@ -1600,7 +1600,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 	}
 	smp_mb(); /* List handling before counting for rcu_barrier(). */
 	rdp->qlen_lazy -= count_lazy;
-	rdp->qlen -= count;
+	ACCESS_ONCE(rdp->qlen) -= count;
 	rdp->n_cbs_invoked += count;
 
 	/* Reinstate batch limit if we have worked down the excess. */
@@ -1889,7 +1889,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 	rdp = this_cpu_ptr(rsp->rda);
 
 	/* Add the callback to our list. */
-	rdp->qlen++;
+	ACCESS_ONCE(rdp->qlen)++;
 	if (lazy)
 		rdp->qlen_lazy++;
 	else
@@ -2423,7 +2423,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
 	rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
 	init_callback_list(rdp);
 	rdp->qlen_lazy = 0;
-	rdp->qlen = 0;
+	ACCESS_ONCE(rdp->qlen) = 0;
 	rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
 	WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
 	WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
-- 
cgit v1.2.3


From 172708d002e0a2aca032b04fe6f2b8525c29244a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 16 May 2012 15:23:45 -0700
Subject: rcu: Add a gcc-style structure initializer for RCU pointers

RCU_INIT_POINTER() returns a value that is never used, and which should
be abolished due to terminal ugliness:

	q = RCU_INIT_POINTER(global_p, p);

However, there are two uses that cannot be handled by a do-while
formulation because they do gcc-style initialization:

	RCU_INIT_POINTER(.real_cred, &init_cred),
	RCU_INIT_POINTER(.cred, &init_cred),

This usage is clever, but not necessarily the nicest approach.
This commit therefore creates an RCU_POINTER_INITIALIZER() macro that
is specifically designed for gcc-style initialization.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: David Howells <dhowells@redhat.com>
---
 include/linux/rcupdate.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9cac722b169c..ffe24c09e53d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -905,6 +905,14 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 #define RCU_INIT_POINTER(p, v) \
 		p = (typeof(*v) __force __rcu *)(v)
 
+/**
+ * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer
+ *
+ * GCC-style initialization for an RCU-protected pointer in a structure field.
+ */
+#define RCU_POINTER_INITIALIZER(p, v) \
+		.p = (typeof(*v) __force __rcu *)(v)
+
 static __always_inline bool __is_kfree_rcu_offset(unsigned long offset)
 {
 	return offset < 4096;
-- 
cgit v1.2.3


From d36cc701b28983ea2c8d80afe68aae5452aea3bf Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 16 May 2012 15:33:15 -0700
Subject: rcu: Use new RCU_POINTER_INITIALIZER for gcc-style initializations

This commit applies the INIT_RCU_POINTER() macro to all uses of
RCU_POINTER_INITIALIZER() that were all too cleverly creating gcc-style
initializations.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: David Howells <dhowells@redhat.com>
---
 include/linux/init_task.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 9e65eff6af3b..8a7476186990 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -168,8 +168,8 @@ extern struct cred init_cred;
 	.children	= LIST_HEAD_INIT(tsk.children),			\
 	.sibling	= LIST_HEAD_INIT(tsk.sibling),			\
 	.group_leader	= &tsk,						\
-	RCU_INIT_POINTER(.real_cred, &init_cred),			\
-	RCU_INIT_POINTER(.cred, &init_cred),				\
+	RCU_POINTER_INITIALIZER(real_cred, &init_cred),			\
+	RCU_POINTER_INITIALIZER(cred, &init_cred),			\
 	.comm		= INIT_TASK_COMM,				\
 	.thread		= INIT_THREAD,					\
 	.fs		= &init_fs,					\
-- 
cgit v1.2.3


From d1b88eb9e3bccaa43fb5d1bde1cbe210b3434731 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 16 May 2012 15:42:30 -0700
Subject: rcu: Remove return value from RCU_INIT_POINTER()

The return value from RCU_INIT_POINTER() is not used, and using it
would be quite ugly, for example:

	q = RCU_INIT_POINTER(global_p, p);

To prevent this sort of ugliness from appearing, this commit wraps
RCU_INIT_POINTER() in a do-while loop.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Acked-by: David Howells <dhowells@redhat.com>
---
 include/linux/rcupdate.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index ffe24c09e53d..abf44d89c6ce 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -903,7 +903,9 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
  * the reader-accessible portions of the linked structure.
  */
 #define RCU_INIT_POINTER(p, v) \
-		p = (typeof(*v) __force __rcu *)(v)
+	do { \
+		p = (typeof(*v) __force __rcu *)(v); \
+	} while (0)
 
 /**
  * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer
-- 
cgit v1.2.3


From e5c1f444d28b1a9eaf9c3927041db0414f684ef4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 16 May 2012 16:31:38 -0700
Subject: key: Remove extraneous parentheses from rcu_assign_keypointer()

This commit removes the extraneous parentheses from rcu_assign_keypointer()
so that rcu_assign_pointer() can be wrapped in do-while.  It also wraps
rcu_assign_keypointer() in a do-while and parenthesizes its final argument,
as suggested by David Howells.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/key.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/key.h b/include/linux/key.h
index 4cd22ed627ef..cef3b315ba7c 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -303,7 +303,9 @@ static inline bool key_is_instantiated(const struct key *key)
 				   rwsem_is_locked(&((struct key *)(KEY))->sem)))
 
 #define rcu_assign_keypointer(KEY, PAYLOAD)				\
-	(rcu_assign_pointer((KEY)->payload.rcudata, PAYLOAD))
+do {									\
+	rcu_assign_pointer((KEY)->payload.rcudata, (PAYLOAD));		\
+} while (0)
 
 #ifdef CONFIG_SYSCTL
 extern ctl_table key_sysctls[];
-- 
cgit v1.2.3


From e9023c4061054cbf59c5288068118a4d0b152f01 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 16 May 2012 15:51:08 -0700
Subject: rcu: Remove return value from rcu_assign_pointer()

The return value from rcu_assign_pointer() is not used, and using it
would be quite ugly, for example:

	q = rcu_assign_pointer(global_p, p);

To prevent this sort of ugliness from spreading, this commit wraps
rcu_assign_pointer() in a do-while loop.

Reported-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Reported-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index abf44d89c6ce..fb8e6db511d7 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -513,10 +513,10 @@ static inline void rcu_preempt_sleep_check(void)
 		(_________p1); \
 	})
 #define __rcu_assign_pointer(p, v, space) \
-	({ \
+	do { \
 		smp_wmb(); \
 		(p) = (typeof(*v) __force space *)(v); \
-	})
+	} while (0)
 
 
 /**
@@ -851,7 +851,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
  *
  * Assigns the specified value to the specified RCU-protected
  * pointer, ensuring that any concurrent RCU readers will see
- * any prior initialization.  Returns the value assigned.
+ * any prior initialization.
  *
  * Inserts memory barriers on architectures that require them
  * (which is most of them), and also prevents the compiler from
-- 
cgit v1.2.3


From 2a3fa843b555d202e682bf08c65ee1a4a93c79cf Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Mon, 21 May 2012 11:58:36 -0700
Subject: rcu: Consolidate tree/tiny __rcu_read_{,un}lock() implementations

The CONFIG_TREE_PREEMPT_RCU and CONFIG_TINY_PREEMPT_RCU versions of
__rcu_read_lock() and __rcu_read_unlock() are identical, so this commit
consolidates them into kernel/rcupdate.h.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h |  1 +
 kernel/rcupdate.c        | 44 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/rcutiny_plugin.h  | 47 +----------------------------------------------
 kernel/rcutree_plugin.h  | 47 +----------------------------------------------
 4 files changed, 47 insertions(+), 92 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index fb8e6db511d7..698555ebf49b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -147,6 +147,7 @@ extern void synchronize_sched(void);
 
 extern void __rcu_read_lock(void);
 extern void __rcu_read_unlock(void);
+extern void rcu_read_unlock_special(struct task_struct *t);
 void synchronize_rcu(void);
 
 /*
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 95cba41ce1e9..4e6a61b15e86 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -53,6 +53,50 @@
 
 #ifdef CONFIG_PREEMPT_RCU
 
+/*
+ * Preemptible RCU implementation for rcu_read_lock().
+ * Just increment ->rcu_read_lock_nesting, shared state will be updated
+ * if we block.
+ */
+void __rcu_read_lock(void)
+{
+	current->rcu_read_lock_nesting++;
+	barrier();  /* critical section after entry code. */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_lock);
+
+/*
+ * Preemptible RCU implementation for rcu_read_unlock().
+ * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
+ * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
+ * invoke rcu_read_unlock_special() to clean up after a context switch
+ * in an RCU read-side critical section and other special cases.
+ */
+void __rcu_read_unlock(void)
+{
+	struct task_struct *t = current;
+
+	if (t->rcu_read_lock_nesting != 1) {
+		--t->rcu_read_lock_nesting;
+	} else {
+		barrier();  /* critical section before exit code. */
+		t->rcu_read_lock_nesting = INT_MIN;
+		barrier();  /* assign before ->rcu_read_unlock_special load */
+		if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
+			rcu_read_unlock_special(t);
+		barrier();  /* ->rcu_read_unlock_special load before assign */
+		t->rcu_read_lock_nesting = 0;
+	}
+#ifdef CONFIG_PROVE_LOCKING
+	{
+		int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
+
+		WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
+	}
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_unlock);
+
 /*
  * Check for a task exiting while in a preemptible-RCU read-side
  * critical section, clean up if so.  No need to issue warnings,
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index fc31a2d65100..a269b0da0eb6 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -132,7 +132,6 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
 	RCU_TRACE(.rcb.name = "rcu_preempt")
 };
 
-static void rcu_read_unlock_special(struct task_struct *t);
 static int rcu_preempted_readers_exp(void);
 static void rcu_report_exp_done(void);
 
@@ -526,24 +525,12 @@ void rcu_preempt_note_context_switch(void)
 	local_irq_restore(flags);
 }
 
-/*
- * Tiny-preemptible RCU implementation for rcu_read_lock().
- * Just increment ->rcu_read_lock_nesting, shared state will be updated
- * if we block.
- */
-void __rcu_read_lock(void)
-{
-	current->rcu_read_lock_nesting++;
-	barrier();  /* needed if we ever invoke rcu_read_lock in rcutiny.c */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_lock);
-
 /*
  * Handle special cases during rcu_read_unlock(), such as needing to
  * notify RCU core processing or task having blocked during the RCU
  * read-side critical section.
  */
-static noinline void rcu_read_unlock_special(struct task_struct *t)
+void rcu_read_unlock_special(struct task_struct *t)
 {
 	int empty;
 	int empty_exp;
@@ -626,38 +613,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
 	local_irq_restore(flags);
 }
 
-/*
- * Tiny-preemptible RCU implementation for rcu_read_unlock().
- * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
- * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
- * invoke rcu_read_unlock_special() to clean up after a context switch
- * in an RCU read-side critical section and other special cases.
- */
-void __rcu_read_unlock(void)
-{
-	struct task_struct *t = current;
-
-	barrier();  /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
-	if (t->rcu_read_lock_nesting != 1)
-		--t->rcu_read_lock_nesting;
-	else {
-		t->rcu_read_lock_nesting = INT_MIN;
-		barrier();  /* assign before ->rcu_read_unlock_special load */
-		if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
-			rcu_read_unlock_special(t);
-		barrier();  /* ->rcu_read_unlock_special load before assign */
-		t->rcu_read_lock_nesting = 0;
-	}
-#ifdef CONFIG_PROVE_LOCKING
-	{
-		int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
-
-		WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
-	}
-#endif /* #ifdef CONFIG_PROVE_LOCKING */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_unlock);
-
 /*
  * Check for a quiescent state from the current CPU.  When a task blocks,
  * the task is recorded in the rcu_preempt_ctrlblk structure, which is
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 3e4899459f3d..4b6b17cdf66b 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -78,7 +78,6 @@ struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt);
 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
 static struct rcu_state *rcu_state = &rcu_preempt_state;
 
-static void rcu_read_unlock_special(struct task_struct *t);
 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
 
 /*
@@ -232,18 +231,6 @@ static void rcu_preempt_note_context_switch(int cpu)
 	local_irq_restore(flags);
 }
 
-/*
- * Tree-preemptible RCU implementation for rcu_read_lock().
- * Just increment ->rcu_read_lock_nesting, shared state will be updated
- * if we block.
- */
-void __rcu_read_lock(void)
-{
-	current->rcu_read_lock_nesting++;
-	barrier();  /* needed if we ever invoke rcu_read_lock in rcutree.c */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_lock);
-
 /*
  * Check for preempted RCU readers blocking the current grace period
  * for the specified rcu_node structure.  If the caller needs a reliable
@@ -310,7 +297,7 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t,
  * notify RCU core processing or task having blocked during the RCU
  * read-side critical section.
  */
-static noinline void rcu_read_unlock_special(struct task_struct *t)
+void rcu_read_unlock_special(struct task_struct *t)
 {
 	int empty;
 	int empty_exp;
@@ -418,38 +405,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
 	}
 }
 
-/*
- * Tree-preemptible RCU implementation for rcu_read_unlock().
- * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
- * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
- * invoke rcu_read_unlock_special() to clean up after a context switch
- * in an RCU read-side critical section and other special cases.
- */
-void __rcu_read_unlock(void)
-{
-	struct task_struct *t = current;
-
-	if (t->rcu_read_lock_nesting != 1)
-		--t->rcu_read_lock_nesting;
-	else {
-		barrier();  /* critical section before exit code. */
-		t->rcu_read_lock_nesting = INT_MIN;
-		barrier();  /* assign before ->rcu_read_unlock_special load */
-		if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
-			rcu_read_unlock_special(t);
-		barrier();  /* ->rcu_read_unlock_special load before assign */
-		t->rcu_read_lock_nesting = 0;
-	}
-#ifdef CONFIG_PROVE_LOCKING
-	{
-		int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
-
-		WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
-	}
-#endif /* #ifdef CONFIG_PROVE_LOCKING */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_unlock);
-
 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE
 
 /*
-- 
cgit v1.2.3


From 28f5c693d03530eb15c5354f758b789189b98c37 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 25 May 2012 14:25:58 -0700
Subject: rcu: Remove function versions of __kfree_rcu and
 __is_kfree_rcu_offset

Commit d8169d4c (Make __kfree_rcu() less dependent on compiler choices)
added cpp macro versions of __kfree_rcu() and __is_kfree_rcu_offset(),
but failed to remove the old inline-function versions.  This commit does
this cleanup.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 698555ebf49b..31568c734525 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -916,24 +916,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 #define RCU_POINTER_INITIALIZER(p, v) \
 		.p = (typeof(*v) __force __rcu *)(v)
 
-static __always_inline bool __is_kfree_rcu_offset(unsigned long offset)
-{
-	return offset < 4096;
-}
-
-static __always_inline
-void __kfree_rcu(struct rcu_head *head, unsigned long offset)
-{
-	typedef void (*rcu_callback)(struct rcu_head *);
-
-	BUILD_BUG_ON(!__builtin_constant_p(offset));
-
-	/* See the kfree_rcu() header comment. */
-	BUILD_BUG_ON(!__is_kfree_rcu_offset(offset));
-
-	kfree_call_rcu(head, (rcu_callback)offset);
-}
-
 /*
  * Does the specified offset indicate that the corresponding rcu_head
  * structure can be handled by kfree_rcu()?
-- 
cgit v1.2.3


From 62fde6edf12b60fddb13a3f0a779c8be0bb7447e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Tue, 22 May 2012 22:10:24 -0700
Subject: rcu: Make __call_rcu() handle invocation from idle

Although __call_rcu() is handled correctly when called from a momentary
non-idle period, if it is called on a CPU that RCU believes to be idle
on RCU_FAST_NO_HZ kernels, the callback might be indefinitely postponed.
This commit therefore ensures that RCU is aware of the new callback and
has a chance to force the CPU out of dyntick-idle mode when a new callback
is posted.

Reported-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 13 ++++---------
 kernel/rcutree.c         | 15 +++++++++------
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 31568c734525..26f6417f0264 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -256,6 +256,10 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
 }
 #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP)
+extern int rcu_is_cpu_idle(void);
+#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */
+
 #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
 bool rcu_lockdep_current_cpu_online(void);
 #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
@@ -267,15 +271,6 @@ static inline bool rcu_lockdep_current_cpu_online(void)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
-#ifdef CONFIG_PROVE_RCU
-extern int rcu_is_cpu_idle(void);
-#else /* !CONFIG_PROVE_RCU */
-static inline int rcu_is_cpu_idle(void)
-{
-	return 0;
-}
-#endif /* else !CONFIG_PROVE_RCU */
-
 static inline void rcu_lock_acquire(struct lockdep_map *map)
 {
 	lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 89addada3e3a..a4a9c916ad36 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -585,8 +585,6 @@ void rcu_nmi_exit(void)
 	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
 }
 
-#ifdef CONFIG_PROVE_RCU
-
 /**
  * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
  *
@@ -604,7 +602,7 @@ int rcu_is_cpu_idle(void)
 }
 EXPORT_SYMBOL(rcu_is_cpu_idle);
 
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
 
 /*
  * Is the current CPU online?  Disable preemption to avoid false positives
@@ -645,9 +643,7 @@ bool rcu_lockdep_current_cpu_online(void)
 }
 EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
 
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-
-#endif /* #ifdef CONFIG_PROVE_RCU */
+#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
 
 /**
  * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
@@ -1904,6 +1900,13 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 	else
 		trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
 
+	/*
+	 * If called from an extended quiescent state, invoke the RCU
+	 * core in order to force a re-evaluation of RCU's idleness.
+	 */
+	if (rcu_is_cpu_idle())
+		invoke_rcu_core();
+
 	/* If interrupts were disabled, don't dive into RCU core. */
 	if (irqs_disabled_flags(flags)) {
 		local_irq_restore(flags);
-- 
cgit v1.2.3


From a16b7a693430406dc229ab0c6b154f669a2031c5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Sat, 26 May 2012 08:56:01 -0700
Subject: rcu: Prevent __call_rcu() from invoking RCU core on offline CPUs

The __call_rcu() function will invoke the RCU core, for example, if
it detects that the current CPU has too many callbacks.  However, this
can happen on an offline CPU that is on its way to the idle loop, in
which case it is an error to invoke the RCU core, and the excess callbacks
will be adopted in any case.  This commit therefore adds checks to
__call_rcu() for running on an offline CPU, refraining from invoking
the RCU core in this case.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutree.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index a4a9c916ad36..ceaa95923a87 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1904,11 +1904,11 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 	 * If called from an extended quiescent state, invoke the RCU
 	 * core in order to force a re-evaluation of RCU's idleness.
 	 */
-	if (rcu_is_cpu_idle())
+	if (rcu_is_cpu_idle() && cpu_online(smp_processor_id()))
 		invoke_rcu_core();
 
-	/* If interrupts were disabled, don't dive into RCU core. */
-	if (irqs_disabled_flags(flags)) {
+	/* If interrupts were disabled or CPU offline, don't invoke RCU core. */
+	if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id())) {
 		local_irq_restore(flags);
 		return;
 	}
-- 
cgit v1.2.3


From 29154c57e35a191c83b19c61b1935c9f21957662 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 30 May 2012 03:21:48 -0700
Subject: rcu: Split RCU core processing out of __call_rcu()

The __call_rcu() function is a bit overweight, so this commit splits
it into actual enqueuing of and accounting for the callback (__call_rcu())
and associated RCU-core processing (__call_rcu_core()).

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutree.c | 90 ++++++++++++++++++++++++++++++--------------------------
 1 file changed, 49 insertions(+), 41 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index ceaa95923a87..70c4da7d2a97 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1861,45 +1861,12 @@ static void invoke_rcu_core(void)
 	raise_softirq(RCU_SOFTIRQ);
 }
 
-static void
-__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
-	   struct rcu_state *rsp, bool lazy)
+/*
+ * Handle any core-RCU processing required by a call_rcu() invocation.
+ */
+static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
+			    struct rcu_head *head, unsigned long flags)
 {
-	unsigned long flags;
-	struct rcu_data *rdp;
-
-	WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
-	debug_rcu_head_queue(head);
-	head->func = func;
-	head->next = NULL;
-
-	smp_mb(); /* Ensure RCU update seen before callback registry. */
-
-	/*
-	 * Opportunistically note grace-period endings and beginnings.
-	 * Note that we might see a beginning right after we see an
-	 * end, but never vice versa, since this CPU has to pass through
-	 * a quiescent state betweentimes.
-	 */
-	local_irq_save(flags);
-	rdp = this_cpu_ptr(rsp->rda);
-
-	/* Add the callback to our list. */
-	ACCESS_ONCE(rdp->qlen)++;
-	if (lazy)
-		rdp->qlen_lazy++;
-	else
-		rcu_idle_count_callbacks_posted();
-	smp_mb();  /* Count before adding callback for rcu_barrier(). */
-	*rdp->nxttail[RCU_NEXT_TAIL] = head;
-	rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
-
-	if (__is_kfree_rcu_offset((unsigned long)func))
-		trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
-					 rdp->qlen_lazy, rdp->qlen);
-	else
-		trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
-
 	/*
 	 * If called from an extended quiescent state, invoke the RCU
 	 * core in order to force a re-evaluation of RCU's idleness.
@@ -1908,10 +1875,8 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 		invoke_rcu_core();
 
 	/* If interrupts were disabled or CPU offline, don't invoke RCU core. */
-	if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id())) {
-		local_irq_restore(flags);
+	if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
 		return;
-	}
 
 	/*
 	 * Force the grace period if too many callbacks or too long waiting.
@@ -1944,6 +1909,49 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 		}
 	} else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
 		force_quiescent_state(rsp, 1);
+}
+
+static void
+__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
+	   struct rcu_state *rsp, bool lazy)
+{
+	unsigned long flags;
+	struct rcu_data *rdp;
+
+	WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
+	debug_rcu_head_queue(head);
+	head->func = func;
+	head->next = NULL;
+
+	smp_mb(); /* Ensure RCU update seen before callback registry. */
+
+	/*
+	 * Opportunistically note grace-period endings and beginnings.
+	 * Note that we might see a beginning right after we see an
+	 * end, but never vice versa, since this CPU has to pass through
+	 * a quiescent state betweentimes.
+	 */
+	local_irq_save(flags);
+	rdp = this_cpu_ptr(rsp->rda);
+
+	/* Add the callback to our list. */
+	ACCESS_ONCE(rdp->qlen)++;
+	if (lazy)
+		rdp->qlen_lazy++;
+	else
+		rcu_idle_count_callbacks_posted();
+	smp_mb();  /* Count before adding callback for rcu_barrier(). */
+	*rdp->nxttail[RCU_NEXT_TAIL] = head;
+	rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
+
+	if (__is_kfree_rcu_offset((unsigned long)func))
+		trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
+					 rdp->qlen_lazy, rdp->qlen);
+	else
+		trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
+
+	/* Go handle any RCU core processing required. */
+	__call_rcu_core(rsp, rdp, head, flags);
 	local_irq_restore(flags);
 }
 
-- 
cgit v1.2.3


From d7118175cce7e76b3292b60832813ef1f28b6523 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Wed, 6 Jun 2012 07:12:43 -0700
Subject: rcu: Fix rcu_is_cpu_idle() #ifdef in TINY_RCU

The rcu_is_cpu_idle() function is used if CONFIG_DEBUG_LOCK_ALLOC,
but TINY_RCU defines it only when CONFIG_PROVE_RCU.  This causes
build failures when CONFIG_DEBUG_LOCK_ALLOC=y but CONFIG_PROVE_RCU=n.
This commit therefore adjusts the #ifdefs for rcu_is_cpu_idle() so
that it is defined when CONFIG_DEBUG_LOCK_ALLOC=y.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutiny.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 37a5444204d2..547b1fe5b052 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -172,7 +172,7 @@ void rcu_irq_enter(void)
 	local_irq_restore(flags);
 }
 
-#ifdef CONFIG_PROVE_RCU
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
 
 /*
  * Test whether RCU thinks that the current CPU is idle.
@@ -183,7 +183,7 @@ int rcu_is_cpu_idle(void)
 }
 EXPORT_SYMBOL(rcu_is_cpu_idle);
 
-#endif /* #ifdef CONFIG_PROVE_RCU */
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 /*
  * Test whether the current CPU was interrupted from idle.  Nested
-- 
cgit v1.2.3


From 1c17e4d4437d8045a596d9f06c1558dc09e2b372 Mon Sep 17 00:00:00 2001
From: Carsten Emde <C.Emde@osadl.org>
Date: Tue, 19 Jun 2012 10:43:16 +0200
Subject: rcu: Prevent uninitialized string in RCU CPU stall info

An uninitialized string may be displayed at the end of the rcu_preempt
detected stall info such as

0: (1 GPs behind) idle=075/140000000000000/0 =8?^D=8?^D
                                             ^^^^^^^^^^
if CONFIG_RCU_FAST_NO_HZ is not defined.

This trivial patch clears the string in this case.

Signed-off-by: Carsten Emde <C.Emde@osadl.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree_plugin.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 4b6b17cdf66b..395cdd1e0634 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -2224,6 +2224,7 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
 
 static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
 {
+	*cp = '\0';
 }
 
 #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
-- 
cgit v1.2.3


From 95f0c1de3e6ed4383cc4b5f52ce4ecfb21026b49 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 19 Jun 2012 11:58:27 -0700
Subject: rcu: Disable preemption in rcu_blocking_is_gp()

It is time to optimize CONFIG_TREE_PREEMPT_RCU's synchronize_rcu()
for uniprocessor optimization, which means that rcu_blocking_is_gp()
can no longer rely on RCU read-side critical sections having disabled
preemption.  This commit therefore disables preemption across
rcu_blocking_is_gp()'s scan of the cpu_online_mask.

(Updated from previous version to fix embarrassing bug spotted by
Wu Fengguang.)

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree.c | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 70c4da7d2a97..e000a623e635 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1981,28 +1981,16 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
  * occasionally incorrectly indicate that there are multiple CPUs online
  * when there was in fact only one the whole time, as this just adds
  * some overhead: RCU still operates correctly.
- *
- * Of course, sampling num_online_cpus() with preemption enabled can
- * give erroneous results if there are concurrent CPU-hotplug operations.
- * For example, given a demonic sequence of preemptions in num_online_cpus()
- * and CPU-hotplug operations, there could be two or more CPUs online at
- * all times, but num_online_cpus() might well return one (or even zero).
- *
- * However, all such demonic sequences require at least one CPU-offline
- * operation.  Furthermore, rcu_blocking_is_gp() giving the wrong answer
- * is only a problem if there is an RCU read-side critical section executing
- * throughout.  But RCU-sched and RCU-bh read-side critical sections
- * disable either preemption or bh, which prevents a CPU from going offline.
- * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return
- * that there is only one CPU when in fact there was more than one throughout
- * is when there were no RCU readers in the system.  If there are no
- * RCU readers, the grace period by definition can be of zero length,
- * regardless of the number of online CPUs.
  */
 static inline int rcu_blocking_is_gp(void)
 {
+	int ret;
+
 	might_sleep();  /* Check for RCU read-side critical section. */
-	return num_online_cpus() <= 1;
+	preempt_disable();
+	ret = num_online_cpus() <= 1;
+	preempt_enable();
+	return ret;
 }
 
 /**
-- 
cgit v1.2.3


From cf01537ecf192d9ff15c32a355db5d5af22bea4d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Thu, 21 Jun 2012 11:26:42 -0700
Subject: rcu: Add check for CPUs going offline with callbacks queued

If a CPU goes offline with callbacks queued, those callbacks might be
indefinitely postponed, which can result in a system hang.  This commit
therefore inserts warnings for this condition.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e000a623e635..95c7b61e77e4 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1509,6 +1509,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	if (need_report & RCU_OFL_TASKS_EXP_GP)
 		rcu_report_exp_rnp(rsp, rnp, true);
+	WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
+		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
+		  cpu, rdp->qlen, rdp->nxtlist);
 }
 
 #else /* #ifdef CONFIG_HOTPLUG_CPU */
-- 
cgit v1.2.3


From bf1304e9cd755be7814ac8365834b7a1d0f06b58 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Wed, 9 May 2012 15:55:39 -0700
Subject: rcu: Dump only the current CPU's buffers for idle-entry/exit warnings

Problems in RCU idle entry and exit are almost always confined to the
offending CPU.  This commit therefore switches ftrace_dump() from
DUMP_ALL to DUMP_ORIG.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Tested-by: Pascal Chapperon <pascal.chapperon@wanadoo.fr>
---
 kernel/rcutree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 4b97bba7396e..6eb48f13eeeb 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -358,7 +358,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
 		struct task_struct *idle = idle_task(smp_processor_id());
 
 		trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
-		ftrace_dump(DUMP_ALL);
+		ftrace_dump(DUMP_ORIG);
 		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
 			  current->pid, current->comm,
 			  idle->pid, idle->comm); /* must be idle task! */
@@ -468,7 +468,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
 
 		trace_rcu_dyntick("Error on exit: not idle task",
 				  oldval, rdtp->dynticks_nesting);
-		ftrace_dump(DUMP_ALL);
+		ftrace_dump(DUMP_ORIG);
 		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
 			  current->pid, current->comm,
 			  idle->pid, idle->comm); /* must be idle task! */
-- 
cgit v1.2.3


From 28f8555364ef5b54b21251c5c8022109a70626e9 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Thu, 10 May 2012 15:37:48 -0700
Subject: rcu: The rcu_needs_cpu() function is not a quiescent state

The TINY_PREEMPT_RCU() function rcu_preempt_needs_cpu(), which is called
from rcu_needs_cpu(), assumes that it is in a quiescent state with respect
to the CPU.  This is no longer the case.  This commit therefore updates
rcu_preempt_needs_cpu() to make it aware that it is not running in a
quiescent state.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Tested-by: Pascal Chapperon <pascal.chapperon@wanadoo.fr>
---
 kernel/rcutiny_plugin.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index fc31a2d65100..849ede51c12d 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -846,8 +846,6 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  */
 int rcu_preempt_needs_cpu(void)
 {
-	if (!rcu_preempt_running_reader())
-		rcu_preempt_cpu_qs();
 	return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
 }
 
-- 
cgit v1.2.3


From e84c48ae3024ac2f14ed1c3671e5ea37c60fb838 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Mon, 4 Jun 2012 20:45:10 -0700
Subject: rcu: Round FAST_NO_HZ lazy timeout to nearest second

Currently, if several CPUs in the same package have all lazy RCU
callbacks, their wakeups will be uncorrelated.  If all the CPUs are in the
same power domain (as is often the case), this will result in unnecessary
power-ups of the package.  This commit therefore uses round_jiffies()
to round the timeouts to a second boundary, increasing the odds that
they can be coalesced with each other or with other timeouts.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree_plugin.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 3e4899459f3d..c28d25522a81 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1968,7 +1968,7 @@ static void rcu_idle_count_callbacks_posted(void)
  */
 #define RCU_IDLE_FLUSHES 5		/* Number of dyntick-idle tries. */
 #define RCU_IDLE_OPT_FLUSHES 3		/* Optional dyntick-idle tries. */
-#define RCU_IDLE_GP_DELAY 6		/* Roughly one grace period. */
+#define RCU_IDLE_GP_DELAY 4		/* Roughly one grace period. */
 #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ)	/* Roughly six seconds. */
 
 /*
@@ -2047,10 +2047,13 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
 		return 1;
 	}
 	/* Set up for the possibility that RCU will post a timer. */
-	if (rcu_cpu_has_nonlazy_callbacks(cpu))
-		*delta_jiffies = RCU_IDLE_GP_DELAY;
-	else
-		*delta_jiffies = RCU_IDLE_LAZY_GP_DELAY;
+	if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
+		*delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies,
+					  RCU_IDLE_GP_DELAY) - jiffies;
+	} else {
+		*delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY;
+		*delta_jiffies = round_jiffies(*delta_jiffies) - jiffies;
+	}
 	return 0;
 }
 
@@ -2187,10 +2190,11 @@ static void rcu_prepare_for_idle(int cpu)
 		if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
 			trace_rcu_prep_idle("Dyntick with callbacks");
 			rdtp->idle_gp_timer_expires =
-					   jiffies + RCU_IDLE_GP_DELAY;
+				round_up(jiffies + RCU_IDLE_GP_DELAY,
+					 RCU_IDLE_GP_DELAY);
 		} else {
 			rdtp->idle_gp_timer_expires =
-					   jiffies + RCU_IDLE_LAZY_GP_DELAY;
+				round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
 			trace_rcu_prep_idle("Dyntick with lazy callbacks");
 		}
 		tp = &rdtp->idle_gp_timer;
-- 
cgit v1.2.3


From 4fa3b6cb1bc8c14b81b4c8ffdfd3f2500a7e9367 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Tue, 5 Jun 2012 15:53:53 -0700
Subject: rcu: Fix qlen_lazy breakage

Commit d8169d4c (Make __kfree_rcu() less dependent on compiler choices)
created a macro out of an inline function in order to avoid build
breakage for certain combinations of gcc flags.  Unfortunately, it also
converted a kfree_call_rcu() to a call_rcu(), which made the rcu_data
structure's ->qlen_lazy field lose counts.  This commit therefore changes
the call_rcu() back to kfree_call_rcu().

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9cac722b169c..46d45e0f9134 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -935,7 +935,7 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
 #define __kfree_rcu(head, offset) \
 	do { \
 		BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
-		call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
+		kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
 	} while (0)
 
 /**
-- 
cgit v1.2.3


From 9d2ad24306f2fafc3612e5a216aab31f9e56e879 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Sun, 24 Jun 2012 10:15:02 -0700
Subject: rcu: Make RCU_FAST_NO_HZ respect nohz= boot parameter

If the nohz= boot parameter disables nohz, then RCU_FAST_NO_HZ needs to
also disable itself.  This commit therefore checks for tick_nohz_enabled
being zero, disabling rcu_prepare_for_idle() if so.  This commit assumes
that tick_nohz_enabled can change at runtime: If this is not the case,
then a simpler approach suffices.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree.h         |  1 +
 kernel/rcutree_plugin.h  | 15 +++++++++++++++
 kernel/time/tick-sched.c |  2 +-
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 19b61ac1079f..e978845c6800 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -97,6 +97,7 @@ struct rcu_dynticks {
 				    /* # times non-lazy CBs posted to CPU. */
 	unsigned long nonlazy_posted_snap;
 				    /* idle-period nonlazy_posted snapshot. */
+	int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
 };
 
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index c28d25522a81..3508000e077d 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1971,6 +1971,8 @@ static void rcu_idle_count_callbacks_posted(void)
 #define RCU_IDLE_GP_DELAY 4		/* Roughly one grace period. */
 #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ)	/* Roughly six seconds. */
 
+extern int tick_nohz_enabled;
+
 /*
  * Does the specified flavor of RCU have non-lazy callbacks pending on
  * the specified CPU?  Both RCU flavor and CPU are specified by the
@@ -2112,6 +2114,7 @@ static void rcu_cleanup_after_idle(int cpu)
 
 	del_timer(&rdtp->idle_gp_timer);
 	trace_rcu_prep_idle("Cleanup after idle");
+	rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled);
 }
 
 /*
@@ -2137,6 +2140,18 @@ static void rcu_prepare_for_idle(int cpu)
 {
 	struct timer_list *tp;
 	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+	int tne;
+
+	/* Handle nohz enablement switches conservatively. */
+	tne = ACCESS_ONCE(tick_nohz_enabled);
+	if (tne != rdtp->tick_nohz_enabled_snap) {
+		if (rcu_cpu_has_callbacks(cpu))
+			invoke_rcu_core(); /* force nohz to see update. */
+		rdtp->tick_nohz_enabled_snap = tne;
+		return;
+	}
+	if (!tne)
+		return;
 
 	/*
 	 * If this is an idle re-entry, for example, due to use of
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 869997833928..66ff07f6184c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -105,7 +105,7 @@ static ktime_t tick_init_jiffy_update(void)
 /*
  * NO HZ enabled ?
  */
-static int tick_nohz_enabled __read_mostly  = 1;
+int tick_nohz_enabled __read_mostly  = 1;
 
 /*
  * Enable / Disable tickless mode
-- 
cgit v1.2.3


From ea8e867d9b505b1c852f9932ba75504bc4eebeaa Mon Sep 17 00:00:00 2001
From: Jayachandran C <jayachandranc@netlogicmicro.com>
Date: Tue, 8 May 2012 18:11:55 +0530
Subject: MIPS: Netlogic: Fix PCIX irq on XLR chips

The correct irq is PIC_PCIX_IRQ

Signed-off-by: Jayachandran C <jayachandranc@netlogicmicro.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/3750/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/pci/pci-xlr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c
index 1644805a6730..50ff4dc28b4f 100644
--- a/arch/mips/pci/pci-xlr.c
+++ b/arch/mips/pci/pci-xlr.c
@@ -327,7 +327,7 @@ static int __init pcibios_init(void)
 		}
 	} else {
 		/* XLR PCI controller ACK */
-		irq_set_handler_data(PIC_PCIE_XLSB0_LINK3_IRQ, xlr_pci_ack);
+		irq_set_handler_data(PIC_PCIX_IRQ, xlr_pci_ack);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 249e2a38fbd28ffaadba112290742ada16946900 Mon Sep 17 00:00:00 2001
From: Ganesan Ramalingam <ganesanr@broadcom.com>
Date: Tue, 8 May 2012 18:11:56 +0530
Subject: MIPS: Netlogic: MSI enable fix for XLS

MSI interrupts do not work on XLS after commit a776c49
( "PCI: msi: Disable msi interrupts when we initialize a pci device" )
because the change disables MSI interrupts on the XLS PCIe bridges
during the PCI enumeration.

Fix this by enabling MSI interrupts on the bridge in the
arch_setup_msi_irq() function. A new function xls_get_pcie_link()
has been introduced to get the PCI device corresponding to the
top level PCIe bridge on which MSI has to be enabled.

Also, update get_irq_vector() to use the new xls_get_pcie_link()
function and PCI_SLOT() macro for determining the IRQ of PCI devices.

Signed-off-by: Ganesan Ramalingam <ganesanr@broadcom.com>
Signed-off-by: Jayachandran C <jayachandranc@netlogicmicro.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/3753/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/pci/pci-xlr.c | 59 +++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 50 insertions(+), 9 deletions(-)

diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c
index 50ff4dc28b4f..172af1cd5867 100644
--- a/arch/mips/pci/pci-xlr.c
+++ b/arch/mips/pci/pci-xlr.c
@@ -41,6 +41,7 @@
 #include <linux/irq.h>
 #include <linux/irqdesc.h>
 #include <linux/console.h>
+#include <linux/pci_regs.h>
 
 #include <asm/io.h>
 
@@ -156,35 +157,55 @@ struct pci_controller nlm_pci_controller = {
 	.io_offset      = 0x00000000UL,
 };
 
+/*
+ * The top level PCIe links on the XLS PCIe controller appear as
+ * bridges. Given a device, this function finds which link it is
+ * on.
+ */
+static struct pci_dev *xls_get_pcie_link(const struct pci_dev *dev)
+{
+	struct pci_bus *bus, *p;
+
+	/* Find the bridge on bus 0 */
+	bus = dev->bus;
+	for (p = bus->parent; p && p->number != 0; p = p->parent)
+		bus = p;
+
+	return p ? bus->self : NULL;
+}
+
 static int get_irq_vector(const struct pci_dev *dev)
 {
+	struct pci_dev *lnk;
+
 	if (!nlm_chip_is_xls())
-		return	PIC_PCIX_IRQ;	/* for XLR just one IRQ*/
+		return	PIC_PCIX_IRQ;	/* for XLR just one IRQ */
 
 	/*
 	 * For XLS PCIe, there is an IRQ per Link, find out which
 	 * link the device is on to assign interrupts
-	*/
-	if (dev->bus->self == NULL)
+	 */
+	lnk = xls_get_pcie_link(dev);
+	if (lnk == NULL)
 		return 0;
 
-	switch	(dev->bus->self->devfn) {
-	case 0x0:
+	switch	(PCI_SLOT(lnk->devfn)) {
+	case 0:
 		return PIC_PCIE_LINK0_IRQ;
-	case 0x8:
+	case 1:
 		return PIC_PCIE_LINK1_IRQ;
-	case 0x10:
+	case 2:
 		if (nlm_chip_is_xls_b())
 			return PIC_PCIE_XLSB0_LINK2_IRQ;
 		else
 			return PIC_PCIE_LINK2_IRQ;
-	case 0x18:
+	case 3:
 		if (nlm_chip_is_xls_b())
 			return PIC_PCIE_XLSB0_LINK3_IRQ;
 		else
 			return PIC_PCIE_LINK3_IRQ;
 	}
-	WARN(1, "Unexpected devfn %d\n", dev->bus->self->devfn);
+	WARN(1, "Unexpected devfn %d\n", lnk->devfn);
 	return 0;
 }
 
@@ -202,7 +223,27 @@ void arch_teardown_msi_irq(unsigned int irq)
 int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
 	struct msi_msg msg;
+	struct pci_dev *lnk;
 	int irq, ret;
+	u16 val;
+
+	/* MSI not supported on XLR */
+	if (!nlm_chip_is_xls())
+		return 1;
+
+	/*
+	 * Enable MSI on the XLS PCIe controller bridge which was disabled
+	 * at enumeration, the bridge MSI capability is at 0x50
+	 */
+	lnk = xls_get_pcie_link(dev);
+	if (lnk == NULL)
+		return 1;
+
+	pci_read_config_word(lnk, 0x50 + PCI_MSI_FLAGS, &val);
+	if ((val & PCI_MSI_FLAGS_ENABLE) == 0) {
+		val |= PCI_MSI_FLAGS_ENABLE;
+		pci_write_config_word(lnk, 0x50 + PCI_MSI_FLAGS, val);
+	}
 
 	irq = get_irq_vector(dev);
 	if (irq <= 0)
-- 
cgit v1.2.3


From b876c1a0bcfe68a5eaf03f325cb2a09822bf11f2 Mon Sep 17 00:00:00 2001
From: Jayachandran C <jayachandranc@netlogicmicro.com>
Date: Tue, 3 Jul 2012 19:04:02 +0200
Subject: MIPS: Netlogic: Fix TLB size of boot CPU.

Starting other threads in the core will change the number of
TLB entries of a CPU.  Re-calculate current_cpu_data.tlbsize
on the boot cpu after enabling and waking up other threads.

The secondary CPUs do not need this logic because the threads
are enabled on the secondary cores at wakeup and before cpu_probe.

Signed-off-by: Jayachandran C <jayachandranc@netlogicmicro.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/3751/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/netlogic/xlp/setup.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c
index acb677a1227c..b3df7c2aad1e 100644
--- a/arch/mips/netlogic/xlp/setup.c
+++ b/arch/mips/netlogic/xlp/setup.c
@@ -82,8 +82,10 @@ void __init prom_free_prom_memory(void)
 
 void xlp_mmu_init(void)
 {
+	/* enable extended TLB and Large Fixed TLB */
 	write_c0_config6(read_c0_config6() | 0x24);
-	current_cpu_data.tlbsize = ((read_c0_config6() >> 16) & 0xffff) + 1;
+
+	/* set page mask of Fixed TLB in config7 */
 	write_c0_config7(PM_DEFAULT_MASK >>
 		(13 + (ffz(PM_DEFAULT_MASK >> 13) / 2)));
 }
@@ -100,6 +102,10 @@ void __init prom_init(void)
 	nlm_common_ebase = read_c0_ebase() & (~((1 << 12) - 1));
 #ifdef CONFIG_SMP
 	nlm_wakeup_secondary_cpus(0xffffffff);
+
+	/* update TLB size after waking up threads */
+	current_cpu_data.tlbsize = ((read_c0_config6() >> 16) & 0xffff) + 1;
+
 	register_smp_ops(&nlm_smp_ops);
 #endif
 }
-- 
cgit v1.2.3


From e84c282b40251f314c429f39b044785e323f2648 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 22 May 2012 14:45:21 +0900
Subject: tools lib traceevent: Let filtering numbers by string use function
 names

As a pointer can be converted into a function name, let the filters
work with the function name as well as with the pointer number.  If
the comparison expects a string, then convert numbers into functions,
but only when the number is the same size as a long.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/n/tip-oxsa1qkr2eq7u8d7r0aapedu@git.kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/traceevent/parse-filter.c | 45 ++++++++++++++++++++++++++++---------
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index dfcfe2c131de..80d872a81f26 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -1710,18 +1710,43 @@ static int test_num(struct event_format *event,
 
 static const char *get_field_str(struct filter_arg *arg, struct pevent_record *record)
 {
-	const char *val = record->data + arg->str.field->offset;
+	struct event_format *event;
+	struct pevent *pevent;
+	unsigned long long addr;
+	const char *val = NULL;
+	char hex[64];
 
-	/*
-	 * We need to copy the data since we can't be sure the field
-	 * is null terminated.
-	 */
-	if (*(val + arg->str.field->size - 1)) {
-		/* copy it */
-		memcpy(arg->str.buffer, val, arg->str.field->size);
-		/* the buffer is already NULL terminated */
-		val = arg->str.buffer;
+	/* If the field is not a string convert it */
+	if (arg->str.field->flags & FIELD_IS_STRING) {
+		val = record->data + arg->str.field->offset;
+
+		/*
+		 * We need to copy the data since we can't be sure the field
+		 * is null terminated.
+		 */
+		if (*(val + arg->str.field->size - 1)) {
+			/* copy it */
+			memcpy(arg->str.buffer, val, arg->str.field->size);
+			/* the buffer is already NULL terminated */
+			val = arg->str.buffer;
+		}
+
+	} else {
+		event = arg->str.field->event;
+		pevent = event->pevent;
+		addr = get_value(event, arg->str.field, record);
+
+		if (arg->str.field->flags & (FIELD_IS_POINTER | FIELD_IS_LONG))
+			/* convert to a kernel symbol */
+			val = pevent_find_function(pevent, addr);
+
+		if (val == NULL) {
+			/* just use the hex of the string name */
+			snprintf(hex, 64, "0x%llx", addr);
+			val = hex;
+		}
 	}
+
 	return val;
 }
 
-- 
cgit v1.2.3


From c2e6dc2b268cca44d522b2ee86147f0d30d7e3e4 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 15 Nov 2011 18:47:48 -0500
Subject: tools lib traceevent: Add support for "%.*s" in bprintk events

The arg notation of '*' in bprintks is not handled by the parser.
Implement it so that they show up properly in the output and do not
kill the tracer from reporting events.

Reported-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/n/tip-t0ctq7t1xz3ud6wv4v886jou@git.kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/traceevent/event-parse.c | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 83f0a8add177..0644c2a23ad6 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -3370,7 +3370,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 		break;
 	}
 	case PRINT_BSTRING:
-		trace_seq_printf(s, format, arg->string.string);
+		print_str_to_seq(s, format, len_arg, arg->string.string);
 		break;
 	case PRINT_OP:
 		/*
@@ -3471,6 +3471,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 	unsigned long long ip, val;
 	char *ptr;
 	void *bptr;
+	int vsize;
 
 	field = pevent->bprint_buf_field;
 	ip_field = pevent->bprint_ip_field;
@@ -3519,6 +3520,8 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 				goto process_again;
 			case '0' ... '9':
 				goto process_again;
+			case '.':
+				goto process_again;
 			case 'p':
 				ls = 1;
 				/* fall through */
@@ -3526,23 +3529,29 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 			case 'u':
 			case 'x':
 			case 'i':
-				/* the pointers are always 4 bytes aligned */
-				bptr = (void *)(((unsigned long)bptr + 3) &
-						~3);
 				switch (ls) {
 				case 0:
-					ls = 4;
+					vsize = 4;
 					break;
 				case 1:
-					ls = pevent->long_size;
+					vsize = pevent->long_size;
 					break;
 				case 2:
-					ls = 8;
+					vsize = 8;
 				default:
+					vsize = ls; /* ? */
 					break;
 				}
-				val = pevent_read_number(pevent, bptr, ls);
-				bptr += ls;
+			/* fall through */
+			case '*':
+				if (*ptr == '*')
+					vsize = 4;
+
+				/* the pointers are always 4 bytes aligned */
+				bptr = (void *)(((unsigned long)bptr + 3) &
+						~3);
+				val = pevent_read_number(pevent, bptr, vsize);
+				bptr += vsize;
 				arg = alloc_arg();
 				arg->next = NULL;
 				arg->type = PRINT_ATOM;
@@ -3550,6 +3559,13 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 				sprintf(arg->atom.atom, "%lld", val);
 				*next = arg;
 				next = &arg->next;
+				/*
+				 * The '*' case means that an arg is used as the length.
+				 * We need to continue to figure out for what.
+				 */
+				if (*ptr == '*')
+					goto process_again;
+
 				break;
 			case 's':
 				arg = alloc_arg();
-- 
cgit v1.2.3


From 0866a97eb7562409ba792f5e904841dd8e23c8b5 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 22 May 2012 14:52:40 +0900
Subject: tools lib traceevent: Add support to show migrate disable counter

The RT kernel added a migrate disable counter in all events. Add
support to show this in the latency format.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/n/tip-l6ulxyda952g7kua4pfsh73k@git.kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/traceevent/event-parse.c | 57 +++++++++++++++++++++++++-------------
 1 file changed, 38 insertions(+), 19 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 0644c2a23ad6..872dd35db051 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -2884,7 +2884,7 @@ static int get_common_info(struct pevent *pevent,
 	event = pevent->events[0];
 	field = pevent_find_common_field(event, type);
 	if (!field)
-		die("field '%s' not found", type);
+		return -1;
 
 	*offset = field->offset;
 	*size = field->size;
@@ -2935,15 +2935,16 @@ static int parse_common_flags(struct pevent *pevent, void *data)
 
 static int parse_common_lock_depth(struct pevent *pevent, void *data)
 {
-	int ret;
-
-	ret = __parse_common(pevent, data,
-			     &pevent->ld_size, &pevent->ld_offset,
-			     "common_lock_depth");
-	if (ret < 0)
-		return -1;
+	return __parse_common(pevent, data,
+			      &pevent->ld_size, &pevent->ld_offset,
+			      "common_lock_depth");
+}
 
-	return ret;
+static int parse_common_migrate_disable(struct pevent *pevent, void *data)
+{
+	return __parse_common(pevent, data,
+			      &pevent->ld_size, &pevent->ld_offset,
+			      "common_migrate_disable");
 }
 
 static int events_id_cmp(const void *a, const void *b);
@@ -3988,10 +3989,13 @@ void pevent_data_lat_fmt(struct pevent *pevent,
 			 struct trace_seq *s, struct pevent_record *record)
 {
 	static int check_lock_depth = 1;
+	static int check_migrate_disable = 1;
 	static int lock_depth_exists;
+	static int migrate_disable_exists;
 	unsigned int lat_flags;
 	unsigned int pc;
 	int lock_depth;
+	int migrate_disable;
 	int hardirq;
 	int softirq;
 	void *data = record->data;
@@ -3999,18 +4003,26 @@ void pevent_data_lat_fmt(struct pevent *pevent,
 	lat_flags = parse_common_flags(pevent, data);
 	pc = parse_common_pc(pevent, data);
 	/* lock_depth may not always exist */
-	if (check_lock_depth) {
-		struct format_field *field;
-		struct event_format *event;
-
-		check_lock_depth = 0;
-		event = pevent->events[0];
-		field = pevent_find_common_field(event, "common_lock_depth");
-		if (field)
-			lock_depth_exists = 1;
-	}
 	if (lock_depth_exists)
 		lock_depth = parse_common_lock_depth(pevent, data);
+	else if (check_lock_depth) {
+		lock_depth = parse_common_lock_depth(pevent, data);
+		if (lock_depth < 0)
+			check_lock_depth = 0;
+		else
+			lock_depth_exists = 1;
+	}
+
+	/* migrate_disable may not always exist */
+	if (migrate_disable_exists)
+		migrate_disable = parse_common_migrate_disable(pevent, data);
+	else if (check_migrate_disable) {
+		migrate_disable = parse_common_migrate_disable(pevent, data);
+		if (migrate_disable < 0)
+			check_migrate_disable = 0;
+		else
+			migrate_disable_exists = 1;
+	}
 
 	hardirq = lat_flags & TRACE_FLAG_HARDIRQ;
 	softirq = lat_flags & TRACE_FLAG_SOFTIRQ;
@@ -4029,6 +4041,13 @@ void pevent_data_lat_fmt(struct pevent *pevent,
 	else
 		trace_seq_putc(s, '.');
 
+	if (migrate_disable_exists) {
+		if (migrate_disable < 0)
+			trace_seq_putc(s, '.');
+		else
+			trace_seq_printf(s, "%d", migrate_disable);
+	}
+
 	if (lock_depth_exists) {
 		if (lock_depth < 0)
 			trace_seq_putc(s, '.');
-- 
cgit v1.2.3


From aaf05c725bed1c7a8c940d9215662c78bea05dfd Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 9 Jan 2012 15:58:09 -0500
Subject: tools lib traceevent: Fix %pM print format arg handling

When %pM is used, the arg value must be a 6 byte character that will
be printed as a 6 byte MAC address. But the code does a break over the
main code which updates the current processing arg to point to the
next arg. If there are other print arguments after a %pM, they will be
off by one. The next arg will still be processing the %pM arg.

Reported-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/n/tip-q3g0n1espikynsdkpbi6ue6t@git.kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/traceevent/event-parse.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 872dd35db051..da06c33dcf41 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -3858,6 +3858,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 				} else if (*(ptr+1) == 'M' || *(ptr+1) == 'm') {
 					print_mac_arg(s, *(ptr+1), data, size, event, arg);
 					ptr++;
+					arg = arg->next;
 					break;
 				}
 
-- 
cgit v1.2.3


From c5b35b731965d16fa8c966e288489857097e0b25 Mon Sep 17 00:00:00 2001
From: Wolfgang Mauerer <wolfgang.mauerer@siemens.com>
Date: Thu, 22 Mar 2012 11:18:21 +0100
Subject: tools lib traceevent: Fix trace_printk for long integers

On 32 bit systems, a conversion of the trace_printk format string
"%lu" -> "%llu" is intended (similar for %lx etc.) when a trace was
taken on a machine with 64 bit long integers. However, the current
code computes the bogus transformation "%lu" -> "%u".  Fix this.

Besides that, the transformation is only required on systems that don't
use 64 bits for long integers natively.

Signed-off-by: Wolfgang Mauerer <wolfgang.mauerer@siemens.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1332411501-8059-3-git-send-email-wolfgang.mauerer@siemens.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/traceevent/event-parse.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index da06c33dcf41..ddee5a8cf135 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -3895,14 +3895,15 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 						break;
 					}
 				}
-				if (pevent->long_size == 8 && ls) {
+				if (pevent->long_size == 8 && ls &&
+				    sizeof(long) != 8) {
 					char *p;
 
 					ls = 2;
 					/* make %l into %ll */
 					p = strchr(format, 'l');
 					if (p)
-						memmove(p, p+1, strlen(p)+1);
+						memmove(p+1, p, strlen(p)+1);
 					else if (strcmp(format, "%p") == 0)
 						strcpy(format, "0x%llx");
 				}
-- 
cgit v1.2.3


From 0fc45ef5202a34b5862ca246740e6ab50bc3e3e1 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Mon, 9 Apr 2012 11:54:29 +0900
Subject: tools lib traceevent: Fix printk_cmp()

The printk_cmp function should use printk_map instead of func_map.
Also rename the variables for consistency.

Signed-off-by: Namhyung Kim <namhyung.kim@lge.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Ahern <dsahern@gmail.com>
Link: http://lkml.kernel.org/r/1333940074-19052-3-git-send-email-namhyung.kim@lge.com
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/traceevent/event-parse.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index ddee5a8cf135..7815b8d2eabd 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -511,12 +511,12 @@ struct printk_list {
 
 static int printk_cmp(const void *a, const void *b)
 {
-	const struct func_map *fa = a;
-	const struct func_map *fb = b;
+	const struct printk_map *pa = a;
+	const struct printk_map *pb = b;
 
-	if (fa->addr < fb->addr)
+	if (pa->addr < pb->addr)
 		return -1;
-	if (fa->addr > fb->addr)
+	if (pa->addr > pb->addr)
 		return 1;
 
 	return 0;
-- 
cgit v1.2.3


From deba3fb26fd1ed3235b00dccced9784a7f76ec3c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Mon, 9 Apr 2012 11:54:30 +0900
Subject: tools lib traceevent: Introduce extend_token()

The __read_token() function has some duplicated code to handle
internal buffer overflow. Factor them out to new extend_token().

According to the man pages of realloc/free(3), they can handle NULL
pointer input so that it can be ended up to compact the code.  Also
handle error path correctly.

Signed-off-by: Namhyung Kim <namhyung.kim@lge.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Ahern <dsahern@gmail.com>
Link: http://lkml.kernel.org/r/1333940074-19052-4-git-send-email-namhyung.kim@lge.com
[rostedt@goodmis.org: added some extra whitespace]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/traceevent/event-parse.c | 50 ++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 7815b8d2eabd..768fab5fbcb7 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -781,6 +781,25 @@ int pevent_peek_char(void)
 	return __peek_char();
 }
 
+static int extend_token(char **tok, char *buf, int size)
+{
+	char *newtok = realloc(*tok, size);
+
+	if (!newtok) {
+		free(*tok);
+		*tok = NULL;
+		return -1;
+	}
+
+	if (!*tok)
+		strcpy(newtok, buf);
+	else
+		strcat(newtok, buf);
+	*tok = newtok;
+
+	return 0;
+}
+
 static enum event_type force_token(const char *str, char **tok);
 
 static enum event_type __read_token(char **tok)
@@ -865,17 +884,10 @@ static enum event_type __read_token(char **tok)
 		do {
 			if (i == (BUFSIZ - 1)) {
 				buf[i] = 0;
-				if (*tok) {
-					*tok = realloc(*tok, tok_size + BUFSIZ);
-					if (!*tok)
-						return EVENT_NONE;
-					strcat(*tok, buf);
-				} else
-					*tok = strdup(buf);
+				tok_size += BUFSIZ;
 
-				if (!*tok)
+				if (extend_token(tok, buf, tok_size) < 0)
 					return EVENT_NONE;
-				tok_size += BUFSIZ;
 				i = 0;
 			}
 			last_ch = ch;
@@ -914,17 +926,10 @@ static enum event_type __read_token(char **tok)
 	while (get_type(__peek_char()) == type) {
 		if (i == (BUFSIZ - 1)) {
 			buf[i] = 0;
-			if (*tok) {
-				*tok = realloc(*tok, tok_size + BUFSIZ);
-				if (!*tok)
-					return EVENT_NONE;
-				strcat(*tok, buf);
-			} else
-				*tok = strdup(buf);
+			tok_size += BUFSIZ;
 
-			if (!*tok)
+			if (extend_token(tok, buf, tok_size) < 0)
 				return EVENT_NONE;
-			tok_size += BUFSIZ;
 			i = 0;
 		}
 		ch = __read_char();
@@ -933,14 +938,7 @@ static enum event_type __read_token(char **tok)
 
  out:
 	buf[i] = 0;
-	if (*tok) {
-		*tok = realloc(*tok, tok_size + i);
-		if (!*tok)
-			return EVENT_NONE;
-		strcat(*tok, buf);
-	} else
-		*tok = strdup(buf);
-	if (!*tok)
+	if (extend_token(tok, buf, tok_size + i + 1) < 0)
 		return EVENT_NONE;
 
 	if (type == EVENT_ITEM) {
-- 
cgit v1.2.3


From ca63858e9eb0ce495031c4ab5291874835cb43cf Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Mon, 9 Apr 2012 11:54:31 +0900
Subject: tools lib traceevent: Handle strdup failure cases

There were some places didn't check return value of the strdup and had
unneeded/duplicated checks.  Fix it.

Signed-off-by: Namhyung Kim <namhyung.kim@lge.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Ahern <dsahern@gmail.com>
Link: http://lkml.kernel.org/r/1333940074-19052-5-git-send-email-namhyung.kim@lge.com
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/traceevent/event-parse.c | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 768fab5fbcb7..cd334d52d333 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -467,8 +467,10 @@ int pevent_register_function(struct pevent *pevent, char *func,
 		item->mod = NULL;
 	item->addr = addr;
 
-	pevent->funclist = item;
+	if (!item->func || (mod && !item->mod))
+		die("malloc func");
 
+	pevent->funclist = item;
 	pevent->func_count++;
 
 	return 0;
@@ -583,10 +585,13 @@ int pevent_register_print_string(struct pevent *pevent, char *fmt,
 	item = malloc_or_die(sizeof(*item));
 
 	item->next = pevent->printklist;
-	pevent->printklist = item;
 	item->printk = strdup(fmt);
 	item->addr = addr;
 
+	if (!item->printk)
+		die("malloc fmt");
+
+	pevent->printklist = item;
 	pevent->printk_count++;
 
 	return 0;
@@ -2150,6 +2155,8 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char **
 		if (value == NULL)
 			goto out_free;
 		field->value = strdup(value);
+		if (field->value == NULL)
+			goto out_free;
 
 		free_arg(arg);
 		arg = alloc_arg();
@@ -2163,6 +2170,8 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char **
 		if (value == NULL)
 			goto out_free;
 		field->str = strdup(value);
+		if (field->str == NULL)
+			goto out_free;
 		free_arg(arg);
 		arg = NULL;
 
@@ -3433,6 +3442,9 @@ process_defined_func(struct trace_seq *s, void *data, int size,
 			string = malloc_or_die(sizeof(*string));
 			string->next = strings;
 			string->str = strdup(str.buffer);
+			if (!string->str)
+				die("malloc str");
+
 			strings = string;
 			trace_seq_destroy(&str);
 			break;
@@ -3571,6 +3583,8 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 				arg->next = NULL;
 				arg->type = PRINT_BSTRING;
 				arg->string.string = strdup(bptr);
+				if (!arg->string.string)
+					break;
 				bptr += strlen(bptr) + 1;
 				*next = arg;
 				next = &arg->next;
@@ -4666,6 +4680,8 @@ int pevent_parse_event(struct pevent *pevent,
 		die("failed to read event id");
 
 	event->system = strdup(sys);
+	if (!event->system)
+		die("failed to allocate system");
 
 	/* Add pevent to event so that it can be referenced */
 	event->pevent = pevent;
@@ -4707,6 +4723,10 @@ int pevent_parse_event(struct pevent *pevent,
 			list = &arg->next;
 			arg->type = PRINT_FIELD;
 			arg->field.name = strdup(field->name);
+			if (!arg->field.name) {
+				do_warning("failed to allocate field name");
+				goto event_failed;
+			}
 			arg->field.field = field;
 		}
 		return 0;
@@ -5050,6 +5070,11 @@ int pevent_register_event_handler(struct pevent *pevent,
 	if (sys_name)
 		handle->sys_name = strdup(sys_name);
 
+	if ((event_name && !handle->event_name) ||
+	    (sys_name && !handle->sys_name)) {
+		die("Failed to allocate event/sys name");
+	}
+
 	handle->func = func;
 	handle->next = pevent->handlers;
 	pevent->handlers = handle;
-- 
cgit v1.2.3


From d286447f23cdb0337a5429e10b39761f6b1d5c18 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Mon, 9 Apr 2012 11:54:33 +0900
Subject: tools lib traceevent: Handle realloc() failure path

The realloc can fail so that we should handle it properly.

Signed-off-by: Namhyung Kim <namhyung.kim@lge.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Ahern <dsahern@gmail.com>
Link: http://lkml.kernel.org/r/1333940074-19052-7-git-send-email-namhyung.kim@lge.com
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/traceevent/event-parse.c | 76 ++++++++++++++++++++++++++++++--------
 1 file changed, 60 insertions(+), 16 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index cd334d52d333..05eb6b40b16a 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -1264,9 +1264,15 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 					field->flags |= FIELD_IS_POINTER;
 
 				if (field->type) {
-					field->type = realloc(field->type,
-							      strlen(field->type) +
-							      strlen(last_token) + 2);
+					char *new_type;
+					new_type = realloc(field->type,
+							   strlen(field->type) +
+							   strlen(last_token) + 2);
+					if (!new_type) {
+						free(last_token);
+						goto fail;
+					}
+					field->type = new_type;
 					strcat(field->type, " ");
 					strcat(field->type, last_token);
 					free(last_token);
@@ -1291,6 +1297,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 		if (strcmp(token, "[") == 0) {
 			enum event_type last_type = type;
 			char *brackets = token;
+			char *new_brackets;
 			int len;
 
 			field->flags |= FIELD_IS_ARRAY;
@@ -1310,9 +1317,14 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 					len = 1;
 				last_type = type;
 
-				brackets = realloc(brackets,
-						   strlen(brackets) +
-						   strlen(token) + len);
+				new_brackets = realloc(brackets,
+						       strlen(brackets) +
+						       strlen(token) + len);
+				if (!new_brackets) {
+					free(brackets);
+					goto fail;
+				}
+				brackets = new_brackets;
 				if (len == 2)
 					strcat(brackets, " ");
 				strcat(brackets, token);
@@ -1328,7 +1340,12 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 
 			free_token(token);
 
-			brackets = realloc(brackets, strlen(brackets) + 2);
+			new_brackets = realloc(brackets, strlen(brackets) + 2);
+			if (!new_brackets) {
+				free(brackets);
+				goto fail;
+			}
+			brackets = new_brackets;
 			strcat(brackets, "]");
 
 			/* add brackets to type */
@@ -1339,10 +1356,16 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 			 * the format: type [] item;
 			 */
 			if (type == EVENT_ITEM) {
-				field->type = realloc(field->type,
-						      strlen(field->type) +
-						      strlen(field->name) +
-						      strlen(brackets) + 2);
+				char *new_type;
+				new_type = realloc(field->type,
+						   strlen(field->type) +
+						   strlen(field->name) +
+						   strlen(brackets) + 2);
+				if (!new_type) {
+					free(brackets);
+					goto fail;
+				}
+				field->type = new_type;
 				strcat(field->type, " ");
 				strcat(field->type, field->name);
 				free_token(field->name);
@@ -1350,9 +1373,15 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 				field->name = token;
 				type = read_token(&token);
 			} else {
-				field->type = realloc(field->type,
-						      strlen(field->type) +
-						      strlen(brackets) + 1);
+				char *new_type;
+				new_type = realloc(field->type,
+						   strlen(field->type) +
+						   strlen(brackets) + 1);
+				if (!new_type) {
+					free(brackets);
+					goto fail;
+				}
+				field->type = new_type;
 				strcat(field->type, brackets);
 			}
 			free(brackets);
@@ -1735,10 +1764,16 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 		/* could just be a type pointer */
 		if ((strcmp(arg->op.op, "*") == 0) &&
 		    type == EVENT_DELIM && (strcmp(token, ")") == 0)) {
+			char *new_atom;
+
 			if (left->type != PRINT_ATOM)
 				die("bad pointer type");
-			left->atom.atom = realloc(left->atom.atom,
+			new_atom = realloc(left->atom.atom,
 					    strlen(left->atom.atom) + 3);
+			if (!new_atom)
+				goto out_free;
+
+			left->atom.atom = new_atom;
 			strcat(left->atom.atom, " *");
 			free(arg->op.op);
 			*arg = *left;
@@ -2597,7 +2632,16 @@ process_arg_token(struct event_format *event, struct print_arg *arg,
 		}
 		/* atoms can be more than one token long */
 		while (type == EVENT_ITEM) {
-			atom = realloc(atom, strlen(atom) + strlen(token) + 2);
+			char *new_atom;
+			new_atom = realloc(atom,
+					   strlen(atom) + strlen(token) + 2);
+			if (!new_atom) {
+				free(atom);
+				*tok = NULL;
+				free_token(token);
+				return EVENT_ERROR;
+			}
+			atom = new_atom;
 			strcat(atom, " ");
 			strcat(atom, token);
 			free_token(token);
-- 
cgit v1.2.3


From 3831a42deba59bf26618b0dd6fa1320a0a94ebd9 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Mon, 23 Apr 2012 13:58:33 +0900
Subject: tools lib traceevent: Pass string type argument to args

It seems PEVENT_FUNC_ARG_STRING missed passing the allocated string to
the args array.  Fix it.

Signed-off-by: Namhyung Kim <namhyung.kim@lge.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Ahern <dsahern@gmail.com>
Link: http://lkml.kernel.org/r/1335157118-14658-7-git-send-email-namhyung.kim@lge.com
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/traceevent/event-parse.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 05eb6b40b16a..337ca02fb525 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -3489,6 +3489,7 @@ process_defined_func(struct trace_seq *s, void *data, int size,
 			if (!string->str)
 				die("malloc str");
 
+			args[i] = (unsigned long long)string->str;
 			strings = string;
 			trace_seq_destroy(&str);
 			break;
-- 
cgit v1.2.3


From 4b5632bc3122ec9b7e875294c8abe92f1573196a Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Mon, 23 Apr 2012 13:58:34 +0900
Subject: tools lib traceevent: Do not call add_event() again if allocation
 failed

When memory allocation for the field name is failed, do not goto
event_failed since we added the event already.

Signed-off-by: Namhyung Kim <namhyung.kim@lge.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Ahern <dsahern@gmail.com>
Link: http://lkml.kernel.org/r/1335157118-14658-8-git-send-email-namhyung.kim@lge.com
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/traceevent/event-parse.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 337ca02fb525..99b0cd4b79d4 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -4770,7 +4770,8 @@ int pevent_parse_event(struct pevent *pevent,
 			arg->field.name = strdup(field->name);
 			if (!arg->field.name) {
 				do_warning("failed to allocate field name");
-				goto event_failed;
+				event->flags |= EVENT_FL_FAILED;
+				return -1;
 			}
 			arg->field.field = field;
 		}
-- 
cgit v1.2.3


From 16e6b8fdfd181ac79f04ff826c42c7d8a2806a17 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Mon, 23 Apr 2012 13:58:35 +0900
Subject: tools lib traceevent: Fix some comments

Update and add missing argument descriptions and fix some typo on
function comments.

Signed-off-by: Namhyung Kim <namhyung.kim@lge.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Ahern <dsahern@gmail.com>
Link: http://lkml.kernel.org/r/1335157118-14658-9-git-send-email-namhyung.kim@lge.com
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/traceevent/event-parse.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 99b0cd4b79d4..863016040dd6 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -4037,8 +4037,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
  * pevent_data_lat_fmt - parse the data for the latency format
  * @pevent: a handle to the pevent
  * @s: the trace_seq to write to
- * @data: the raw data to read from
- * @size: currently unused.
+ * @record: the record to read from
  *
  * This parses out the Latency format (interrupts disabled,
  * need rescheduling, in hard/soft interrupt, preempt count
@@ -4173,10 +4172,7 @@ const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid)
  * pevent_data_comm_from_pid - parse the data into the print format
  * @s: the trace_seq to write to
  * @event: the handle to the event
- * @cpu: the cpu the event was recorded on
- * @data: the raw data
- * @size: the size of the raw data
- * @nsecs: the timestamp of the event
+ * @record: the record to read from
  *
  * This parses the raw @data using the given @event information and
  * writes the print format into the trace_seq.
@@ -4944,7 +4940,7 @@ int pevent_get_any_field_val(struct trace_seq *s, struct event_format *event,
  * @record: The record with the field name.
  * @err: print default error if failed.
  *
- * Returns: 0 on success, -1 field not fould, or 1 if buffer is full.
+ * Returns: 0 on success, -1 field not found, or 1 if buffer is full.
  */
 int pevent_print_num_field(struct trace_seq *s, const char *fmt,
 			   struct event_format *event, const char *name,
@@ -4986,11 +4982,12 @@ static void free_func_handle(struct pevent_function_handler *func)
  * pevent_register_print_function - register a helper function
  * @pevent: the handle to the pevent
  * @func: the function to process the helper function
+ * @ret_type: the return type of the helper function
  * @name: the name of the helper function
  * @parameters: A list of enum pevent_func_arg_type
  *
  * Some events may have helper functions in the print format arguments.
- * This allows a plugin to dynmically create a way to process one
+ * This allows a plugin to dynamically create a way to process one
  * of these functions.
  *
  * The @parameters is a variable list of pevent_func_arg_type enums that
@@ -5061,12 +5058,13 @@ int pevent_register_print_function(struct pevent *pevent,
 }
 
 /**
- * pevent_register_event_handle - register a way to parse an event
+ * pevent_register_event_handler - register a way to parse an event
  * @pevent: the handle to the pevent
  * @id: the id of the event to register
  * @sys_name: the system name the event belongs to
  * @event_name: the name of the event
  * @func: the function to call to parse the event information
+ * @context: the data to be passed to @func
  *
  * This function allows a developer to override the parsing of
  * a given event. If for some reason the default print format
-- 
cgit v1.2.3


From e54b34aed1c4082ed03f4d1f7a19276059b1e30a Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Mon, 23 Apr 2012 13:58:36 +0900
Subject: tools lib traceevent: Check result of malloc() during reading token

The malloc can fail so the return value should be checked.  For now,
just use malloc_or_die().

Signed-off-by: Namhyung Kim <namhyung.kim@lge.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Ahern <dsahern@gmail.com>
Link: http://lkml.kernel.org/r/1335157118-14658-10-git-send-email-namhyung.kim@lge.com
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/traceevent/parse-filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 80d872a81f26..d54c2b4dbd9f 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -96,7 +96,7 @@ static enum event_type read_token(char **tok)
 	    (strcmp(token, "=") == 0 || strcmp(token, "!") == 0) &&
 	    pevent_peek_char() == '~') {
 		/* append it */
-		*tok = malloc(3);
+		*tok = malloc_or_die(3);
 		sprintf(*tok, "%c%c", *token, '~');
 		free_token(token);
 		/* Now remove the '~' from the buffer */
-- 
cgit v1.2.3


From 0fed48341529716c38493be66591bda458921b75 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Mon, 23 Apr 2012 13:58:38 +0900
Subject: tools lib traceevent: Check return value of arg_to_str()

The arg_to_str() can fail so we should handle that case properly.

Signed-off-by: Namhyung Kim <namhyung.kim@lge.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Ahern <dsahern@gmail.com>
Link: http://lkml.kernel.org/r/1335157118-14658-12-git-send-email-namhyung.kim@lge.com
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/traceevent/parse-filter.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index d54c2b4dbd9f..f020ac61f9c1 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -2026,11 +2026,13 @@ static char *exp_to_str(struct event_filter *filter, struct filter_arg *arg)
 	char *lstr;
 	char *rstr;
 	char *op;
-	char *str;
+	char *str = NULL;
 	int len;
 
 	lstr = arg_to_str(filter, arg->exp.left);
 	rstr = arg_to_str(filter, arg->exp.right);
+	if (!lstr || !rstr)
+		goto out;
 
 	switch (arg->exp.type) {
 	case FILTER_EXP_ADD:
@@ -2070,6 +2072,7 @@ static char *exp_to_str(struct event_filter *filter, struct filter_arg *arg)
 	len = strlen(op) + strlen(lstr) + strlen(rstr) + 4;
 	str = malloc_or_die(len);
 	snprintf(str, len, "%s %s %s", lstr, op, rstr);
+out:
 	free(lstr);
 	free(rstr);
 
@@ -2086,6 +2089,8 @@ static char *num_to_str(struct event_filter *filter, struct filter_arg *arg)
 
 	lstr = arg_to_str(filter, arg->num.left);
 	rstr = arg_to_str(filter, arg->num.right);
+	if (!lstr || !rstr)
+		goto out;
 
 	switch (arg->num.type) {
 	case FILTER_CMP_EQ:
@@ -2122,6 +2127,7 @@ static char *num_to_str(struct event_filter *filter, struct filter_arg *arg)
 		break;
 	}
 
+out:
 	free(lstr);
 	free(rstr);
 	return str;
@@ -2272,7 +2278,12 @@ int pevent_filter_compare(struct event_filter *filter1, struct event_filter *fil
 		/* The best way to compare complex filters is with strings */
 		str1 = arg_to_str(filter1, filter_type1->filter);
 		str2 = arg_to_str(filter2, filter_type2->filter);
-		result = strcmp(str1, str2) != 0;
+		if (str1 && str2)
+			result = strcmp(str1, str2) != 0;
+		else
+			/* bail out if allocation fails */
+			result = 1;
+
 		free(str1);
 		free(str2);
 		if (result)
-- 
cgit v1.2.3


From c9bbabe32a231b5caa8c42fa6783405346983c59 Mon Sep 17 00:00:00 2001
From: Peter Huewe <peterhuewe@gmx.de>
Date: Tue, 24 Apr 2012 23:19:40 +0200
Subject: tools lib traceevent: Add missing break in make_bprint_args

In the current code we assign vsize=8 and then fall through to the
default and assign vsize=1. -> probably the break is missing here,
otherwise we can remove the case.

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/n/tip-3fxjy46h2tr9pl0spv7tems6@git.kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/traceevent/event-parse.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 863016040dd6..f880db457842 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -3594,6 +3594,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 					break;
 				case 2:
 					vsize = 8;
+					break;
 				default:
 					vsize = ls; /* ? */
 					break;
-- 
cgit v1.2.3


From f6ced60fb6c0ee115982157457c47e48802d6e1d Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 24 Apr 2012 10:29:44 +0900
Subject: tools lib traceevent: Cleanup realloc use

The if branch is completely unnecessary since 'realloc' can handle
NULL pointers for the first parameter.

This patch is just an adoption of Ulrich Drepper's recent patch on
perf tools.

Signed-off-by: Namhyung Kim <namhyung.kim@lge.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/r/1335230984-7613-1-git-send-email-namhyung.kim@lge.com
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/traceevent/event-parse.c  |  8 ++------
 tools/lib/traceevent/parse-filter.c | 24 ++++++++----------------
 2 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index f880db457842..5f34aa371b56 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -633,12 +633,8 @@ static void add_event(struct pevent *pevent, struct event_format *event)
 {
 	int i;
 
-	if (!pevent->events)
-		pevent->events = malloc_or_die(sizeof(event));
-	else
-		pevent->events =
-			realloc(pevent->events, sizeof(event) *
-				(pevent->nr_events + 1));
+	pevent->events = realloc(pevent->events, sizeof(event) *
+				 (pevent->nr_events + 1));
 	if (!pevent->events)
 		die("Can not allocate events");
 
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index f020ac61f9c1..ad17855528f9 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -148,17 +148,11 @@ add_filter_type(struct event_filter *filter, int id)
 	if (filter_type)
 		return filter_type;
 
-	if (!filter->filters)
-		filter->event_filters =
-			malloc_or_die(sizeof(*filter->event_filters));
-	else {
-		filter->event_filters =
-			realloc(filter->event_filters,
-				sizeof(*filter->event_filters) *
-				(filter->filters + 1));
-		if (!filter->event_filters)
-			die("Could not allocate filter");
-	}
+	filter->event_filters =	realloc(filter->event_filters,
+					sizeof(*filter->event_filters) *
+					(filter->filters + 1));
+	if (!filter->event_filters)
+		die("Could not allocate filter");
 
 	for (i = 0; i < filter->filters; i++) {
 		if (filter->event_filters[i].event_id > id)
@@ -1480,7 +1474,7 @@ void pevent_filter_clear_trivial(struct event_filter *filter,
 {
 	struct filter_type *filter_type;
 	int count = 0;
-	int *ids;
+	int *ids = NULL;
 	int i;
 
 	if (!filter->filters)
@@ -1504,10 +1498,8 @@ void pevent_filter_clear_trivial(struct event_filter *filter,
 		default:
 			break;
 		}
-		if (count)
-			ids = realloc(ids, sizeof(*ids) * (count + 1));
-		else
-			ids = malloc(sizeof(*ids));
+
+		ids = realloc(ids, sizeof(*ids) * (count + 1));
 		if (!ids)
 			die("Can't allocate ids");
 		ids[count++] = filter_type->event_id;
-- 
cgit v1.2.3


From 0c47935c5b5cd4916cf1c1ed4a2894807f7bcc3e Mon Sep 17 00:00:00 2001
From: Daniel Nicoletti <dantti12@gmail.com>
Date: Wed, 4 Jul 2012 10:20:31 -0300
Subject: HID: add battery quirk for Apple Wireless ANSI

Add USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI, to the quirk list since it report
wrong feature type and wrong percentage range.

Signed-off-by: Daniel Nicoletti <dantti12@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 132b0019365e..5301006f6c15 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -301,6 +301,9 @@ static const struct hid_device_id hid_battery_quirks[] = {
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
 			       USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI),
 	  HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE },
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
+		USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI),
+	  HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE },
 	{}
 };
 
-- 
cgit v1.2.3


From ebf124ffab59e9e1c6179347fe95fe5baf32dcd7 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 4 Jul 2012 00:00:47 +0200
Subject: perf test: Use ARRAY_SIZE in parse events tests

Use ARRAY_SIZE instead of defining the sizes separately for each test
arrays.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1341352848-11833-10-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events-test.c | 29 ++++++++++-------------------
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c
index dd0c306a0698..1b997d2b89ce 100644
--- a/tools/perf/util/parse-events-test.c
+++ b/tools/perf/util/parse-events-test.c
@@ -634,8 +634,6 @@ static struct test__event_st test__events[] = {
 	},
 };
 
-#define TEST__EVENTS_CNT (sizeof(test__events) / sizeof(struct test__event_st))
-
 static struct test__event_st test__events_pmu[] = {
 	[0] = {
 		.name  = "cpu/config=10,config1,config2=3,period=1000/u",
@@ -647,9 +645,6 @@ static struct test__event_st test__events_pmu[] = {
 	},
 };
 
-#define TEST__EVENTS_PMU_CNT (sizeof(test__events_pmu) / \
-			      sizeof(struct test__event_st))
-
 struct test__term {
 	const char *str;
 	__u32 type;
@@ -765,21 +760,17 @@ int parse_events__test(void)
 {
 	int ret;
 
-	do {
-		ret = test_events(test__events, TEST__EVENTS_CNT);
-		if (ret)
-			break;
-
-		if (test_pmu()) {
-			ret = test_events(test__events_pmu,
-					  TEST__EVENTS_PMU_CNT);
-			if (ret)
-				break;
-		}
+#define TEST_EVENTS(tests)				\
+do {							\
+	ret = test_events(tests, ARRAY_SIZE(tests));	\
+	if (ret)					\
+		return ret;				\
+} while (0)
 
-		ret = test_terms(test__terms, TEST__TERMS_CNT);
+	TEST_EVENTS(test__events);
 
-	} while (0);
+	if (test_pmu())
+		TEST_EVENTS(test__events_pmu);
 
-	return ret;
+	return test_terms(test__terms, ARRAY_SIZE(test__terms));
 }
-- 
cgit v1.2.3


From 8c5f0a84c6b16e04195001bfd78281909519cf09 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 4 Jul 2012 00:00:39 +0200
Subject: perf tools: Add empty rule for new line in event syntax parsing

The flex generator prints out each input character that is ignored by
lex rules.

Since the alias processing, we can have '\n' characters on input. We
need to assign empty rule to it, so it's not printed out.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1341352848-11833-2-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.l | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index a06689474210..6cbe092e6c3b 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -152,6 +152,7 @@ r{num_raw_hex}		{ return raw(yyscanner); }
 ,			{ return ','; }
 :			{ return ':'; }
 =			{ return '='; }
+\n			{ }
 
 <mem>{
 {modifier_bp}		{ return str(yyscanner, PE_MODIFIER_BP); }
-- 
cgit v1.2.3


From cf3506dcc4a855d11af20bcb271ac921033ba0de Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 4 Jul 2012 00:00:43 +0200
Subject: perf tools: Split out PE_VALUE_SYM parsing token to SW and HW tokens

Spliting PE_VALUE_SYM token to PE_VALUE_SYM_HW and PE_VALUE_SYM_SW
tokens to separate hardware and software symbols.

This will be useful in upcomming patch where we want to be able to parse
out only hardware events.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1341352848-11833-6-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.l |  2 +-
 tools/perf/util/parse-events.y | 15 +++++++++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 6cbe092e6c3b..384ca74c6b22 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -56,7 +56,7 @@ static int sym(yyscan_t scanner, int type, int config)
 	YYSTYPE *yylval = parse_events_get_lval(scanner);
 
 	yylval->num = (type << 16) + config;
-	return PE_VALUE_SYM;
+	return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW;
 }
 
 static int term(yyscan_t scanner, int type)
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 9525c455d27f..2bc5fbff2b5d 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -26,14 +26,15 @@ do { \
 %}
 
 %token PE_START_EVENTS PE_START_TERMS
-%token PE_VALUE PE_VALUE_SYM PE_RAW PE_TERM
+%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM
 %token PE_NAME
 %token PE_MODIFIER_EVENT PE_MODIFIER_BP
 %token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT
 %token PE_PREFIX_MEM PE_PREFIX_RAW
 %token PE_ERROR
 %type <num> PE_VALUE
-%type <num> PE_VALUE_SYM
+%type <num> PE_VALUE_SYM_HW
+%type <num> PE_VALUE_SYM_SW
 %type <num> PE_RAW
 %type <num> PE_TERM
 %type <str> PE_NAME
@@ -41,6 +42,7 @@ do { \
 %type <str> PE_NAME_CACHE_OP_RESULT
 %type <str> PE_MODIFIER_EVENT
 %type <str> PE_MODIFIER_BP
+%type <num> value_sym
 %type <head> event_config
 %type <term> event_term
 %type <head> event_pmu
@@ -109,8 +111,13 @@ PE_NAME '/' event_config '/'
 	$$ = list;
 }
 
+value_sym:
+PE_VALUE_SYM_HW
+|
+PE_VALUE_SYM_SW
+
 event_legacy_symbol:
-PE_VALUE_SYM '/' event_config '/'
+value_sym '/' event_config '/'
 {
 	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
@@ -123,7 +130,7 @@ PE_VALUE_SYM '/' event_config '/'
 	$$ = list;
 }
 |
-PE_VALUE_SYM sep_slash_dc
+value_sym sep_slash_dc
 {
 	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
-- 
cgit v1.2.3


From 1dc12760854a670d0366dcfd8ad179e3574c1712 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 4 Jul 2012 00:00:44 +0200
Subject: perf tools: Split event symbols arrays to hw and sw parts

It'll be convenient in upcoming patch to access hw event symbols
strings via enum perf_hw_id indexes. In order not to duplicate
the data, creating two separate arrays:

  event_symbols_hw for enum perf_hw_id events
  event_symbols_sw for enum perf_sw_ids events

Changing the current event list code to follow the change.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1341352848-11833-7-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c | 174 +++++++++++++++++++++++++++--------------
 1 file changed, 117 insertions(+), 57 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 1dc44dc69133..1aa721d7c10f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -19,8 +19,6 @@
 #define MAX_NAME_LEN 100
 
 struct event_symbol {
-	u8		type;
-	u64		config;
 	const char	*symbol;
 	const char	*alias;
 };
@@ -30,30 +28,86 @@ extern int parse_events_debug;
 #endif
 int parse_events_parse(void *data, void *scanner);
 
-#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x
-#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x
-
-static struct event_symbol event_symbols[] = {
-  { CHW(CPU_CYCLES),			"cpu-cycles",			"cycles"		},
-  { CHW(STALLED_CYCLES_FRONTEND),	"stalled-cycles-frontend",	"idle-cycles-frontend"	},
-  { CHW(STALLED_CYCLES_BACKEND),	"stalled-cycles-backend",	"idle-cycles-backend"	},
-  { CHW(INSTRUCTIONS),			"instructions",			""			},
-  { CHW(CACHE_REFERENCES),		"cache-references",		""			},
-  { CHW(CACHE_MISSES),			"cache-misses",			""			},
-  { CHW(BRANCH_INSTRUCTIONS),		"branch-instructions",		"branches"		},
-  { CHW(BRANCH_MISSES),			"branch-misses",		""			},
-  { CHW(BUS_CYCLES),			"bus-cycles",			""			},
-  { CHW(REF_CPU_CYCLES),		"ref-cycles",			""			},
-
-  { CSW(CPU_CLOCK),			"cpu-clock",			""			},
-  { CSW(TASK_CLOCK),			"task-clock",			""			},
-  { CSW(PAGE_FAULTS),			"page-faults",			"faults"		},
-  { CSW(PAGE_FAULTS_MIN),		"minor-faults",			""			},
-  { CSW(PAGE_FAULTS_MAJ),		"major-faults",			""			},
-  { CSW(CONTEXT_SWITCHES),		"context-switches",		"cs"			},
-  { CSW(CPU_MIGRATIONS),		"cpu-migrations",		"migrations"		},
-  { CSW(ALIGNMENT_FAULTS),		"alignment-faults",		""			},
-  { CSW(EMULATION_FAULTS),		"emulation-faults",		""			},
+static struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = {
+		.symbol = "cpu-cycles",
+		.alias  = "cycles",
+	},
+	[PERF_COUNT_HW_INSTRUCTIONS] = {
+		.symbol = "instructions",
+		.alias  = "",
+	},
+	[PERF_COUNT_HW_CACHE_REFERENCES] = {
+		.symbol = "cache-references",
+		.alias  = "",
+	},
+	[PERF_COUNT_HW_CACHE_MISSES] = {
+		.symbol = "cache-misses",
+		.alias  = "",
+	},
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = {
+		.symbol = "branch-instructions",
+		.alias  = "branches",
+	},
+	[PERF_COUNT_HW_BRANCH_MISSES] = {
+		.symbol = "branch-misses",
+		.alias  = "",
+	},
+	[PERF_COUNT_HW_BUS_CYCLES] = {
+		.symbol = "bus-cycles",
+		.alias  = "",
+	},
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = {
+		.symbol = "stalled-cycles-frontend",
+		.alias  = "idle-cycles-frontend",
+	},
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = {
+		.symbol = "stalled-cycles-backend",
+		.alias  = "idle-cycles-backend",
+	},
+	[PERF_COUNT_HW_REF_CPU_CYCLES] = {
+		.symbol = "ref-cycles",
+		.alias  = "",
+	},
+};
+
+static struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
+	[PERF_COUNT_SW_CPU_CLOCK] = {
+		.symbol = "cpu-clock",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_TASK_CLOCK] = {
+		.symbol = "task-clock",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_PAGE_FAULTS] = {
+		.symbol = "page-faults",
+		.alias  = "faults",
+	},
+	[PERF_COUNT_SW_CONTEXT_SWITCHES] = {
+		.symbol = "context-switches",
+		.alias  = "cs",
+	},
+	[PERF_COUNT_SW_CPU_MIGRATIONS] = {
+		.symbol = "cpu-migrations",
+		.alias  = "migrations",
+	},
+	[PERF_COUNT_SW_PAGE_FAULTS_MIN] = {
+		.symbol = "minor-faults",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_PAGE_FAULTS_MAJ] = {
+		.symbol = "major-faults",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_ALIGNMENT_FAULTS] = {
+		.symbol = "alignment-faults",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_EMULATION_FAULTS] = {
+		.symbol = "emulation-faults",
+		.alias  = "",
+	},
 };
 
 #define __PERF_EVENT_FIELD(config, name) \
@@ -824,16 +878,13 @@ int is_valid_tracepoint(const char *event_string)
 	return 0;
 }
 
-void print_events_type(u8 type)
+static void __print_events_type(u8 type, struct event_symbol *syms,
+				unsigned max)
 {
-	struct event_symbol *syms = event_symbols;
-	unsigned int i;
 	char name[64];
+	unsigned i;
 
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
-		if (type != syms->type)
-			continue;
-
+	for (i = 0; i < max ; i++, syms++) {
 		if (strlen(syms->alias))
 			snprintf(name, sizeof(name),  "%s OR %s",
 				 syms->symbol, syms->alias);
@@ -845,6 +896,14 @@ void print_events_type(u8 type)
 	}
 }
 
+void print_events_type(u8 type)
+{
+	if (type == PERF_TYPE_SOFTWARE)
+		__print_events_type(type, event_symbols_sw, PERF_COUNT_SW_MAX);
+	else
+		__print_events_type(type, event_symbols_hw, PERF_COUNT_HW_MAX);
+}
+
 int print_hwcache_events(const char *event_glob)
 {
 	unsigned int type, op, i, printed = 0;
@@ -872,26 +931,13 @@ int print_hwcache_events(const char *event_glob)
 	return printed;
 }
 
-/*
- * Print the help text for the event symbols:
- */
-void print_events(const char *event_glob)
+static void print_symbol_events(const char *event_glob, unsigned type,
+				struct event_symbol *syms, unsigned max)
 {
-	unsigned int i, type, prev_type = -1, printed = 0, ntypes_printed = 0;
-	struct event_symbol *syms = event_symbols;
+	unsigned i, printed = 0;
 	char name[MAX_NAME_LEN];
 
-	printf("\n");
-	printf("List of pre-defined events (to be used in -e):\n");
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
-		type = syms->type;
-
-		if (type != prev_type && printed) {
-			printf("\n");
-			printed = 0;
-			ntypes_printed++;
-		}
+	for (i = 0; i < max; i++, syms++) {
 
 		if (event_glob != NULL && 
 		    !(strglobmatch(syms->symbol, event_glob) ||
@@ -902,17 +948,31 @@ void print_events(const char *event_glob)
 			snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);
 		else
 			strncpy(name, syms->symbol, MAX_NAME_LEN);
-		printf("  %-50s [%s]\n", name,
-			event_type_descriptors[type]);
 
-		prev_type = type;
-		++printed;
+		printf("  %-50s [%s]\n", name, event_type_descriptors[type]);
+
+		printed++;
 	}
 
-	if (ntypes_printed) {
-		printed = 0;
+	if (printed)
 		printf("\n");
-	}
+}
+
+/*
+ * Print the help text for the event symbols:
+ */
+void print_events(const char *event_glob)
+{
+
+	printf("\n");
+	printf("List of pre-defined events (to be used in -e):\n");
+
+	print_symbol_events(event_glob, PERF_TYPE_HARDWARE,
+			    event_symbols_hw, PERF_COUNT_HW_MAX);
+
+	print_symbol_events(event_glob, PERF_TYPE_SOFTWARE,
+			    event_symbols_sw, PERF_COUNT_SW_MAX);
+
 	print_hwcache_events(event_glob);
 
 	if (event_glob != NULL)
-- 
cgit v1.2.3


From ce5c1fe9a9e059b5c58f0a7e2a3e687d0efac815 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 20 Jun 2012 11:11:38 +0200
Subject: perf/x86: Fix USER/KERNEL tagging of samples

Several perf interrupt handlers (PEBS,IBS,BTS) re-write regs->ip but
do not update the segment registers. So use an regs->ip based test
instead of an regs->cs/regs->flags based test.

Reported-and-tested-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-xxrt0a1zronm1sm36obwc2vy@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c4706cf9c011..6ef9d41b87f9 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1863,7 +1863,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
 		else
 			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
 	} else {
-		if (user_mode(regs))
+		if (!kernel_ip(regs->ip))
 			misc |= PERF_RECORD_MISC_USER;
 		else
 			misc |= PERF_RECORD_MISC_KERNEL;
-- 
cgit v1.2.3


From 15c7ad51ad58cbd3b46112c1840bc7228bd354bf Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 20 Jun 2012 20:46:33 +0200
Subject: perf/x86: Rename Intel specific macros

There are macros that are Intel specific and not x86 generic. Rename
them into INTEL_*.

This patch removes X86_PMC_IDX_GENERIC and does:

 $ sed -i -e 's/X86_PMC_MAX_/INTEL_PMC_MAX_/g'           \
         arch/x86/include/asm/kvm_host.h                 \
         arch/x86/include/asm/perf_event.h               \
         arch/x86/kernel/cpu/perf_event.c                \
         arch/x86/kernel/cpu/perf_event_p4.c             \
         arch/x86/kvm/pmu.c
 $ sed -i -e 's/X86_PMC_IDX_FIXED/INTEL_PMC_IDX_FIXED/g' \
         arch/x86/include/asm/perf_event.h               \
         arch/x86/kernel/cpu/perf_event.c                \
         arch/x86/kernel/cpu/perf_event_intel.c          \
         arch/x86/kernel/cpu/perf_event_intel_ds.c       \
         arch/x86/kvm/pmu.c
 $ sed -i -e 's/X86_PMC_MSK_/INTEL_PMC_MSK_/g'           \
         arch/x86/include/asm/perf_event.h               \
         arch/x86/kernel/cpu/perf_event.c

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1340217996-2254-2-git-send-email-robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/kvm_host.h           |  4 ++--
 arch/x86/include/asm/perf_event.h         | 17 +++++++-------
 arch/x86/kernel/cpu/perf_event.c          | 38 +++++++++++++++----------------
 arch/x86/kernel/cpu/perf_event_intel.c    | 14 ++++++------
 arch/x86/kernel/cpu/perf_event_intel_ds.c |  4 ++--
 arch/x86/kernel/cpu/perf_event_p4.c       |  2 +-
 arch/x86/kvm/pmu.c                        | 22 +++++++++---------
 7 files changed, 50 insertions(+), 51 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index db7c1f2709a2..2da88c0cda14 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -313,8 +313,8 @@ struct kvm_pmu {
 	u64 counter_bitmask[2];
 	u64 global_ctrl_mask;
 	u8 version;
-	struct kvm_pmc gp_counters[X86_PMC_MAX_GENERIC];
-	struct kvm_pmc fixed_counters[X86_PMC_MAX_FIXED];
+	struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
+	struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
 	struct irq_work irq_work;
 	u64 reprogram_pmi;
 };
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 588f52ea810e..3b31248caf60 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -5,11 +5,10 @@
  * Performance event hw details:
  */
 
-#define X86_PMC_MAX_GENERIC				       32
-#define X86_PMC_MAX_FIXED					3
+#define INTEL_PMC_MAX_GENERIC				       32
+#define INTEL_PMC_MAX_FIXED					3
+#define INTEL_PMC_IDX_FIXED				       32
 
-#define X86_PMC_IDX_GENERIC				        0
-#define X86_PMC_IDX_FIXED				       32
 #define X86_PMC_IDX_MAX					       64
 
 #define MSR_ARCH_PERFMON_PERFCTR0			      0xc1
@@ -121,16 +120,16 @@ struct x86_pmu_capability {
 
 /* Instr_Retired.Any: */
 #define MSR_ARCH_PERFMON_FIXED_CTR0	0x309
-#define X86_PMC_IDX_FIXED_INSTRUCTIONS	(X86_PMC_IDX_FIXED + 0)
+#define INTEL_PMC_IDX_FIXED_INSTRUCTIONS	(INTEL_PMC_IDX_FIXED + 0)
 
 /* CPU_CLK_Unhalted.Core: */
 #define MSR_ARCH_PERFMON_FIXED_CTR1	0x30a
-#define X86_PMC_IDX_FIXED_CPU_CYCLES	(X86_PMC_IDX_FIXED + 1)
+#define INTEL_PMC_IDX_FIXED_CPU_CYCLES	(INTEL_PMC_IDX_FIXED + 1)
 
 /* CPU_CLK_Unhalted.Ref: */
 #define MSR_ARCH_PERFMON_FIXED_CTR2	0x30b
-#define X86_PMC_IDX_FIXED_REF_CYCLES	(X86_PMC_IDX_FIXED + 2)
-#define X86_PMC_MSK_FIXED_REF_CYCLES	(1ULL << X86_PMC_IDX_FIXED_REF_CYCLES)
+#define INTEL_PMC_IDX_FIXED_REF_CYCLES	(INTEL_PMC_IDX_FIXED + 2)
+#define INTEL_PMC_MSK_FIXED_REF_CYCLES	(1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES)
 
 /*
  * We model BTS tracing as another fixed-mode PMC.
@@ -139,7 +138,7 @@ struct x86_pmu_capability {
  * values are used by actual fixed events and higher values are used
  * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
  */
-#define X86_PMC_IDX_FIXED_BTS				(X86_PMC_IDX_FIXED + 16)
+#define INTEL_PMC_IDX_FIXED_BTS				(INTEL_PMC_IDX_FIXED + 16)
 
 /*
  * IBS cpuid feature detection
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e677d9923f4f..668050002602 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -63,7 +63,7 @@ u64 x86_perf_event_update(struct perf_event *event)
 	int idx = hwc->idx;
 	s64 delta;
 
-	if (idx == X86_PMC_IDX_FIXED_BTS)
+	if (idx == INTEL_PMC_IDX_FIXED_BTS)
 		return 0;
 
 	/*
@@ -626,8 +626,8 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
 	c = sched->constraints[sched->state.event];
 
 	/* Prefer fixed purpose counters */
-	if (c->idxmsk64 & (~0ULL << X86_PMC_IDX_FIXED)) {
-		idx = X86_PMC_IDX_FIXED;
+	if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
+		idx = INTEL_PMC_IDX_FIXED;
 		for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
 			if (!__test_and_set_bit(idx, sched->state.used))
 				goto done;
@@ -635,7 +635,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
 	}
 	/* Grab the first unused counter starting with idx */
 	idx = sched->state.counter;
-	for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
+	for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
 		if (!__test_and_set_bit(idx, sched->state.used))
 			goto done;
 	}
@@ -813,13 +813,13 @@ static inline void x86_assign_hw_event(struct perf_event *event,
 	hwc->last_cpu = smp_processor_id();
 	hwc->last_tag = ++cpuc->tags[i];
 
-	if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
+	if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
 		hwc->config_base = 0;
 		hwc->event_base	= 0;
-	} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
+	} else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
 		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
-		hwc->event_base_rdpmc = (hwc->idx - X86_PMC_IDX_FIXED) | 1<<30;
+		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
+		hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
 	} else {
 		hwc->config_base = x86_pmu_config_addr(hwc->idx);
 		hwc->event_base  = x86_pmu_event_addr(hwc->idx);
@@ -921,7 +921,7 @@ int x86_perf_event_set_period(struct perf_event *event)
 	s64 period = hwc->sample_period;
 	int ret = 0, idx = hwc->idx;
 
-	if (idx == X86_PMC_IDX_FIXED_BTS)
+	if (idx == INTEL_PMC_IDX_FIXED_BTS)
 		return 0;
 
 	/*
@@ -1338,21 +1338,21 @@ static int __init init_hw_perf_events(void)
 	for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
 		quirk->func();
 
-	if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
+	if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
 		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
-		     x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
-		x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
+		     x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
+		x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
 	}
 	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
 
-	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
+	if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
 		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
-		     x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
-		x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
+		     x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
+		x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
 	}
 
 	x86_pmu.intel_ctrl |=
-		((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
+		((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
 
 	perf_events_lapic_init();
 	register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
@@ -1368,7 +1368,7 @@ static int __init init_hw_perf_events(void)
 		 */
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
 			if (c->cmask != X86_RAW_EVENT_MASK
-			    || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) {
+			    || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
 				continue;
 			}
 
@@ -1611,8 +1611,8 @@ static int x86_pmu_event_idx(struct perf_event *event)
 	if (!x86_pmu.attr_rdpmc)
 		return 0;
 
-	if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
-		idx -= X86_PMC_IDX_FIXED;
+	if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
+		idx -= INTEL_PMC_IDX_FIXED;
 		idx |= 1 << 30;
 	}
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 8408e37f5fa4..5b0b362c7ae6 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -747,7 +747,7 @@ static void intel_pmu_disable_all(void)
 
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 
-	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
 		intel_pmu_disable_bts();
 
 	intel_pmu_pebs_disable_all();
@@ -763,9 +763,9 @@ static void intel_pmu_enable_all(int added)
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
 			x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
 
-	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
+	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
 		struct perf_event *event =
-			cpuc->events[X86_PMC_IDX_FIXED_BTS];
+			cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
 
 		if (WARN_ON_ONCE(!event))
 			return;
@@ -871,7 +871,7 @@ static inline void intel_pmu_ack_status(u64 ack)
 
 static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
 {
-	int idx = hwc->idx - X86_PMC_IDX_FIXED;
+	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
 	u64 ctrl_val, mask;
 
 	mask = 0xfULL << (idx * 4);
@@ -886,7 +886,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-	if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
+	if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
 		intel_pmu_disable_bts();
 		intel_pmu_drain_bts_buffer();
 		return;
@@ -915,7 +915,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
 
 static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
 {
-	int idx = hwc->idx - X86_PMC_IDX_FIXED;
+	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
 	u64 ctrl_val, bits, mask;
 
 	/*
@@ -949,7 +949,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-	if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
+	if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
 		if (!__this_cpu_read(cpu_hw_events.enabled))
 			return;
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 026373edef7f..629ae0b7ad90 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -248,7 +248,7 @@ void reserve_ds_buffers(void)
  */
 
 struct event_constraint bts_constraint =
-	EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
+	EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
 
 void intel_pmu_enable_bts(u64 config)
 {
@@ -295,7 +295,7 @@ int intel_pmu_drain_bts_buffer(void)
 		u64	to;
 		u64	flags;
 	};
-	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
+	struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
 	struct bts_record *at, *top;
 	struct perf_output_handle handle;
 	struct perf_event_header header;
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 6c82e4037989..92c7e39a079f 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1325,7 +1325,7 @@ __init int p4_pmu_init(void)
 	unsigned int low, high;
 
 	/* If we get stripped -- indexing fails */
-	BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC);
+	BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC);
 
 	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
 	if (!(low & (1 << 7))) {
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 2e88438ffd83..9b7ec1150ab0 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -80,10 +80,10 @@ static inline struct kvm_pmc *get_fixed_pmc_idx(struct kvm_pmu *pmu, int idx)
 
 static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx)
 {
-	if (idx < X86_PMC_IDX_FIXED)
+	if (idx < INTEL_PMC_IDX_FIXED)
 		return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0);
 	else
-		return get_fixed_pmc_idx(pmu, idx - X86_PMC_IDX_FIXED);
+		return get_fixed_pmc_idx(pmu, idx - INTEL_PMC_IDX_FIXED);
 }
 
 void kvm_deliver_pmi(struct kvm_vcpu *vcpu)
@@ -291,7 +291,7 @@ static void reprogram_idx(struct kvm_pmu *pmu, int idx)
 	if (pmc_is_gp(pmc))
 		reprogram_gp_counter(pmc, pmc->eventsel);
 	else {
-		int fidx = idx - X86_PMC_IDX_FIXED;
+		int fidx = idx - INTEL_PMC_IDX_FIXED;
 		reprogram_fixed_counter(pmc,
 				fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx);
 	}
@@ -452,7 +452,7 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
 		return;
 
 	pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff,
-			X86_PMC_MAX_GENERIC);
+			INTEL_PMC_MAX_GENERIC);
 	pmu->counter_bitmask[KVM_PMC_GP] =
 		((u64)1 << ((entry->eax >> 16) & 0xff)) - 1;
 	bitmap_len = (entry->eax >> 24) & 0xff;
@@ -462,13 +462,13 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
 		pmu->nr_arch_fixed_counters = 0;
 	} else {
 		pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f),
-				X86_PMC_MAX_FIXED);
+				INTEL_PMC_MAX_FIXED);
 		pmu->counter_bitmask[KVM_PMC_FIXED] =
 			((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;
 	}
 
 	pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
-		(((1ull << pmu->nr_arch_fixed_counters) - 1) << X86_PMC_IDX_FIXED);
+		(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
 	pmu->global_ctrl_mask = ~pmu->global_ctrl;
 }
 
@@ -478,15 +478,15 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
 
 	memset(pmu, 0, sizeof(*pmu));
-	for (i = 0; i < X86_PMC_MAX_GENERIC; i++) {
+	for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
 		pmu->gp_counters[i].type = KVM_PMC_GP;
 		pmu->gp_counters[i].vcpu = vcpu;
 		pmu->gp_counters[i].idx = i;
 	}
-	for (i = 0; i < X86_PMC_MAX_FIXED; i++) {
+	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
 		pmu->fixed_counters[i].type = KVM_PMC_FIXED;
 		pmu->fixed_counters[i].vcpu = vcpu;
-		pmu->fixed_counters[i].idx = i + X86_PMC_IDX_FIXED;
+		pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
 	}
 	init_irq_work(&pmu->irq_work, trigger_pmi);
 	kvm_pmu_cpuid_update(vcpu);
@@ -498,13 +498,13 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu)
 	int i;
 
 	irq_work_sync(&pmu->irq_work);
-	for (i = 0; i < X86_PMC_MAX_GENERIC; i++) {
+	for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
 		struct kvm_pmc *pmc = &pmu->gp_counters[i];
 		stop_counter(pmc);
 		pmc->counter = pmc->eventsel = 0;
 	}
 
-	for (i = 0; i < X86_PMC_MAX_FIXED; i++)
+	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++)
 		stop_counter(&pmu->fixed_counters[i]);
 
 	pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
-- 
cgit v1.2.3


From a1eac7ac903ea9afbd4f133659710a0588c8eca5 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 20 Jun 2012 20:46:34 +0200
Subject: perf/x86: Move Intel specific code to intel_pmu_init()

There is some Intel specific code in the generic x86 path. Move it to
intel_pmu_init().

Since p4 and p6 pmus don't have fixed counters we may skip the check
in case such a pmu is detected.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1340217996-2254-3-git-send-email-robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c       | 34 ++--------------------------------
 arch/x86/kernel/cpu/perf_event_intel.c | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 668050002602..7b4f1e871f7c 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1307,7 +1307,6 @@ static struct attribute_group x86_pmu_format_group = {
 static int __init init_hw_perf_events(void)
 {
 	struct x86_pmu_quirk *quirk;
-	struct event_constraint *c;
 	int err;
 
 	pr_info("Performance Events: ");
@@ -1338,21 +1337,8 @@ static int __init init_hw_perf_events(void)
 	for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
 		quirk->func();
 
-	if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
-		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
-		     x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
-		x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
-	}
-	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
-
-	if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
-		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
-		     x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
-		x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
-	}
-
-	x86_pmu.intel_ctrl |=
-		((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
+	if (!x86_pmu.intel_ctrl)
+		x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
 
 	perf_events_lapic_init();
 	register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
@@ -1361,22 +1347,6 @@ static int __init init_hw_perf_events(void)
 		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
 				   0, x86_pmu.num_counters, 0);
 
-	if (x86_pmu.event_constraints) {
-		/*
-		 * event on fixed counter2 (REF_CYCLES) only works on this
-		 * counter, so do not extend mask to generic counters
-		 */
-		for_each_event_constraint(c, x86_pmu.event_constraints) {
-			if (c->cmask != X86_RAW_EVENT_MASK
-			    || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
-				continue;
-			}
-
-			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
-			c->weight += x86_pmu.num_counters;
-		}
-	}
-
 	x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
 	x86_pmu_format_group.attrs = x86_pmu.format_attrs;
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 5b0b362c7ae6..2e9444c80148 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1765,6 +1765,7 @@ __init int intel_pmu_init(void)
 	union cpuid10_edx edx;
 	union cpuid10_eax eax;
 	union cpuid10_ebx ebx;
+	struct event_constraint *c;
 	unsigned int unused;
 	int version;
 
@@ -1953,5 +1954,37 @@ __init int intel_pmu_init(void)
 		}
 	}
 
+	if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
+		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
+		     x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
+		x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
+	}
+	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+
+	if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
+		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
+		     x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
+		x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
+	}
+
+	x86_pmu.intel_ctrl |=
+		((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
+
+	if (x86_pmu.event_constraints) {
+		/*
+		 * event on fixed counter2 (REF_CYCLES) only works on this
+		 * counter, so do not extend mask to generic counters
+		 */
+		for_each_event_constraint(c, x86_pmu.event_constraints) {
+			if (c->cmask != X86_RAW_EVENT_MASK
+			    || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
+				continue;
+			}
+
+			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
+			c->weight += x86_pmu.num_counters;
+		}
+	}
+
 	return 0;
 }
-- 
cgit v1.2.3


From b1dc3c4820428ac6216537416b2fcd140fdc52e5 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 20 Jun 2012 20:46:35 +0200
Subject: perf/x86/amd: Unify AMD's generic and family 15h pmus

There is no need for keeping separate pmu structs. We can enable
amd_{get,put}_event_constraints() functions also for family 15h event.

The advantage is that there is only a single pmu struct for all AMD
cpus. This patch introduces functions to setup the pmu to enabe core
performance counters or counter constraints.

Also, cpuid checks are used instead of family checks where
possible. Thus, it enables the code independently of cpu families if
the feature flag is set.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1340217996-2254-4-git-send-email-robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/perf_event.h    |   3 +-
 arch/x86/kernel/cpu/perf_event_amd.c | 103 ++++++++++++++++-------------------
 arch/x86/oprofile/op_model_amd.c     |   4 +-
 3 files changed, 49 insertions(+), 61 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 3b31248caf60..ffdf5e0991c4 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -47,8 +47,7 @@
 	(X86_RAW_EVENT_MASK          |  \
 	 AMD64_EVENTSEL_EVENT)
 #define AMD64_NUM_COUNTERS				4
-#define AMD64_NUM_COUNTERS_F15H				6
-#define AMD64_NUM_COUNTERS_MAX				AMD64_NUM_COUNTERS_F15H
+#define AMD64_NUM_COUNTERS_CORE				6
 
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 11a4eb9131d5..4528ae7b6ec4 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -366,7 +366,7 @@ static void amd_pmu_cpu_starting(int cpu)
 
 	cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
 
-	if (boot_cpu_data.x86_max_cores < 2 || boot_cpu_data.x86 == 0x15)
+	if (boot_cpu_data.x86_max_cores < 2)
 		return;
 
 	nb_id = amd_get_nb_id(cpu);
@@ -422,35 +422,6 @@ static struct attribute *amd_format_attr[] = {
 	NULL,
 };
 
-static __initconst const struct x86_pmu amd_pmu = {
-	.name			= "AMD",
-	.handle_irq		= x86_pmu_handle_irq,
-	.disable_all		= x86_pmu_disable_all,
-	.enable_all		= x86_pmu_enable_all,
-	.enable			= x86_pmu_enable_event,
-	.disable		= x86_pmu_disable_event,
-	.hw_config		= amd_pmu_hw_config,
-	.schedule_events	= x86_schedule_events,
-	.eventsel		= MSR_K7_EVNTSEL0,
-	.perfctr		= MSR_K7_PERFCTR0,
-	.event_map		= amd_pmu_event_map,
-	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_counters		= AMD64_NUM_COUNTERS,
-	.cntval_bits		= 48,
-	.cntval_mask		= (1ULL << 48) - 1,
-	.apic			= 1,
-	/* use highest bit to detect overflow */
-	.max_period		= (1ULL << 47) - 1,
-	.get_event_constraints	= amd_get_event_constraints,
-	.put_event_constraints	= amd_put_event_constraints,
-
-	.format_attrs		= amd_format_attr,
-
-	.cpu_prepare		= amd_pmu_cpu_prepare,
-	.cpu_starting		= amd_pmu_cpu_starting,
-	.cpu_dead		= amd_pmu_cpu_dead,
-};
-
 /* AMD Family 15h */
 
 #define AMD_EVENT_TYPE_MASK	0x000000F0ULL
@@ -597,8 +568,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
 	}
 }
 
-static __initconst const struct x86_pmu amd_pmu_f15h = {
-	.name			= "AMD Family 15h",
+static __initconst const struct x86_pmu amd_pmu = {
+	.name			= "AMD",
 	.handle_irq		= x86_pmu_handle_irq,
 	.disable_all		= x86_pmu_disable_all,
 	.enable_all		= x86_pmu_enable_all,
@@ -606,50 +577,68 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
 	.disable		= x86_pmu_disable_event,
 	.hw_config		= amd_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
-	.eventsel		= MSR_F15H_PERF_CTL,
-	.perfctr		= MSR_F15H_PERF_CTR,
+	.eventsel		= MSR_K7_EVNTSEL0,
+	.perfctr		= MSR_K7_PERFCTR0,
 	.event_map		= amd_pmu_event_map,
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_counters		= AMD64_NUM_COUNTERS_F15H,
+	.num_counters		= AMD64_NUM_COUNTERS,
 	.cntval_bits		= 48,
 	.cntval_mask		= (1ULL << 48) - 1,
 	.apic			= 1,
 	/* use highest bit to detect overflow */
 	.max_period		= (1ULL << 47) - 1,
-	.get_event_constraints	= amd_get_event_constraints_f15h,
-	/* nortbridge counters not yet implemented: */
-#if 0
+	.get_event_constraints	= amd_get_event_constraints,
 	.put_event_constraints	= amd_put_event_constraints,
 
+	.format_attrs		= amd_format_attr,
+
 	.cpu_prepare		= amd_pmu_cpu_prepare,
-	.cpu_dead		= amd_pmu_cpu_dead,
-#endif
 	.cpu_starting		= amd_pmu_cpu_starting,
-	.format_attrs		= amd_format_attr,
+	.cpu_dead		= amd_pmu_cpu_dead,
 };
 
+static int setup_event_constraints(void)
+{
+	if (boot_cpu_data.x86 >= 0x15)
+		x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
+	return 0;
+}
+
+static int setup_perfctr_core(void)
+{
+	if (!cpu_has_perfctr_core) {
+		WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h,
+		     KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!");
+		return -ENODEV;
+	}
+
+	WARN(x86_pmu.get_event_constraints == amd_get_event_constraints,
+	     KERN_ERR "hw perf events core counters need constraints handler!");
+
+	/*
+	 * If core performance counter extensions exists, we must use
+	 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
+	 * x86_pmu_addr_offset().
+	 */
+	x86_pmu.eventsel	= MSR_F15H_PERF_CTL;
+	x86_pmu.perfctr		= MSR_F15H_PERF_CTR;
+	x86_pmu.num_counters	= AMD64_NUM_COUNTERS_CORE;
+
+	printk(KERN_INFO "perf: AMD core performance counters detected\n");
+
+	return 0;
+}
+
 __init int amd_pmu_init(void)
 {
 	/* Performance-monitoring supported from K7 and later: */
 	if (boot_cpu_data.x86 < 6)
 		return -ENODEV;
 
-	/*
-	 * If core performance counter extensions exists, it must be
-	 * family 15h, otherwise fail. See x86_pmu_addr_offset().
-	 */
-	switch (boot_cpu_data.x86) {
-	case 0x15:
-		if (!cpu_has_perfctr_core)
-			return -ENODEV;
-		x86_pmu = amd_pmu_f15h;
-		break;
-	default:
-		if (cpu_has_perfctr_core)
-			return -ENODEV;
-		x86_pmu = amd_pmu;
-		break;
-	}
+	x86_pmu = amd_pmu;
+
+	setup_event_constraints();
+	setup_perfctr_core();
 
 	/* Events are common for all AMDs */
 	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 303f08637826..b2b94438ff05 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -312,7 +312,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
 			goto fail;
 		}
 		/* both registers must be reserved */
-		if (num_counters == AMD64_NUM_COUNTERS_F15H) {
+		if (num_counters == AMD64_NUM_COUNTERS_CORE) {
 			msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
 			msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
 		} else {
@@ -514,7 +514,7 @@ static int op_amd_init(struct oprofile_operations *ops)
 	ops->create_files = setup_ibs_files;
 
 	if (boot_cpu_data.x86 == 0x15) {
-		num_counters = AMD64_NUM_COUNTERS_F15H;
+		num_counters = AMD64_NUM_COUNTERS_CORE;
 	} else {
 		num_counters = AMD64_NUM_COUNTERS;
 	}
-- 
cgit v1.2.3


From f285f92f7e4c9af20149130c8fd5027131b39b0e Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 20 Jun 2012 20:46:36 +0200
Subject: perf/x86: Improve debug output in check_hw_exists()

It might be of interest which perfctr msr failed.

Signed-off-by: Robert Richter <robert.richter@amd.com>
[ added hunk to avoid GCC warn ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1340217996-2254-5-git-send-email-robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 7b4f1e871f7c..3eb88ebcec5a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -178,7 +178,7 @@ static void release_pmc_hardware(void) {}
 
 static bool check_hw_exists(void)
 {
-	u64 val, val_new = 0;
+	u64 val, val_new = ~0;
 	int i, reg, ret = 0;
 
 	/*
@@ -211,8 +211,9 @@ static bool check_hw_exists(void)
 	 * that don't trap on the MSR access and always return 0s.
 	 */
 	val = 0xabcdUL;
-	ret = wrmsrl_safe(x86_pmu_event_addr(0), val);
-	ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
+	reg = x86_pmu_event_addr(0);
+	ret = wrmsrl_safe(reg, val);
+	ret |= rdmsrl_safe(reg, &val_new);
 	if (ret || val != val_new)
 		goto msr_fail;
 
@@ -229,6 +230,7 @@ bios_fail:
 
 msr_fail:
 	printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
+	printk(KERN_ERR "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new);
 
 	return false;
 }
-- 
cgit v1.2.3


From c93dc84cbe32435be3ffa2fbde355eff94955c32 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 8 Jun 2012 14:50:50 +0200
Subject: perf/x86: Add a microcode revision check for SNB-PEBS

Recent Intel microcode resolved the SNB-PEBS issues, so conditionally
enable PEBS on SNB hardware depending on the microcode revision.

Thanks to Stephane for figuring out the various microcode revisions.

Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-v3672ziwh9damwqwh1uz3krm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/perf_event.h      |  2 ++
 arch/x86/kernel/cpu/perf_event.c       | 21 +++++++++-----
 arch/x86/kernel/cpu/perf_event.h       |  4 ++-
 arch/x86/kernel/cpu/perf_event_intel.c | 51 ++++++++++++++++++++++++++++++++--
 arch/x86/kernel/microcode_core.c       | 10 +++++--
 5 files changed, 74 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index ffdf5e0991c4..c78f14a0df00 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -232,6 +232,7 @@ struct perf_guest_switch_msr {
 
 extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
 extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
+extern void perf_check_microcode(void);
 #else
 static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
 {
@@ -245,6 +246,7 @@ static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
 }
 
 static inline void perf_events_lapic_init(void)	{ }
+static inline void perf_check_microcode(void) { }
 #endif
 
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3eb88ebcec5a..29557aa06dda 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -379,7 +379,7 @@ int x86_pmu_hw_config(struct perf_event *event)
 		int precise = 0;
 
 		/* Support for constant skid */
-		if (x86_pmu.pebs_active) {
+		if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
 			precise++;
 
 			/* Support for IP fixup */
@@ -1650,13 +1650,20 @@ static void x86_pmu_flush_branch_stack(void)
 		x86_pmu.flush_branch_stack();
 }
 
+void perf_check_microcode(void)
+{
+	if (x86_pmu.check_microcode)
+		x86_pmu.check_microcode();
+}
+EXPORT_SYMBOL_GPL(perf_check_microcode);
+
 static struct pmu pmu = {
 	.pmu_enable		= x86_pmu_enable,
 	.pmu_disable		= x86_pmu_disable,
 
-	.attr_groups	= x86_pmu_attr_groups,
+	.attr_groups		= x86_pmu_attr_groups,
 
-	.event_init	= x86_pmu_event_init,
+	.event_init		= x86_pmu_event_init,
 
 	.add			= x86_pmu_add,
 	.del			= x86_pmu_del,
@@ -1664,11 +1671,11 @@ static struct pmu pmu = {
 	.stop			= x86_pmu_stop,
 	.read			= x86_pmu_read,
 
-	.start_txn	= x86_pmu_start_txn,
-	.cancel_txn	= x86_pmu_cancel_txn,
-	.commit_txn	= x86_pmu_commit_txn,
+	.start_txn		= x86_pmu_start_txn,
+	.cancel_txn		= x86_pmu_cancel_txn,
+	.commit_txn		= x86_pmu_commit_txn,
 
-	.event_idx	= x86_pmu_event_idx,
+	.event_idx		= x86_pmu_event_idx,
 	.flush_branch_stack	= x86_pmu_flush_branch_stack,
 };
 
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 83238f2a12b2..3f5c66904355 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -361,6 +361,8 @@ struct x86_pmu {
 	void		(*cpu_starting)(int cpu);
 	void		(*cpu_dying)(int cpu);
 	void		(*cpu_dead)(int cpu);
+
+	void		(*check_microcode)(void);
 	void		(*flush_branch_stack)(void);
 
 	/*
@@ -373,7 +375,7 @@ struct x86_pmu {
 	 * Intel DebugStore bits
 	 */
 	int		bts, pebs;
-	int		bts_active, pebs_active;
+	int		bts_active, pebs_active, pebs_broken;
 	int		pebs_record_size;
 	void		(*drain_pebs)(struct pt_regs *regs);
 	struct event_constraint *pebs_constraints;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 2e9444c80148..5fdedb4bc3f1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1712,11 +1712,56 @@ static __init void intel_clovertown_quirk(void)
 	x86_pmu.pebs_constraints = NULL;
 }
 
+static int intel_snb_pebs_broken(int cpu)
+{
+	u32 rev = UINT_MAX; /* default to broken for unknown models */
+
+	switch (cpu_data(cpu).x86_model) {
+	case 42: /* SNB */
+		rev = 0x28;
+		break;
+
+	case 45: /* SNB-EP */
+		switch (cpu_data(cpu).x86_mask) {
+		case 6: rev = 0x618; break;
+		case 7: rev = 0x70c; break;
+		}
+	}
+
+	return (cpu_data(cpu).microcode < rev);
+}
+
+static void intel_snb_check_microcode(void)
+{
+	int pebs_broken = 0;
+	int cpu;
+
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		if ((pebs_broken = intel_snb_pebs_broken(cpu)))
+			break;
+	}
+	put_online_cpus();
+
+	if (pebs_broken == x86_pmu.pebs_broken)
+		return;
+
+	/*
+	 * Serialized by the microcode lock..
+	 */
+	if (x86_pmu.pebs_broken) {
+		pr_info("PEBS enabled due to microcode update\n");
+		x86_pmu.pebs_broken = 0;
+	} else {
+		pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n");
+		x86_pmu.pebs_broken = 1;
+	}
+}
+
 static __init void intel_sandybridge_quirk(void)
 {
-	printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
-	x86_pmu.pebs = 0;
-	x86_pmu.pebs_constraints = NULL;
+	x86_pmu.check_microcode = intel_snb_check_microcode;
+	intel_snb_check_microcode();
 }
 
 static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 947e4c64b1d8..1649cf899ad6 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -87,6 +87,7 @@
 #include <asm/microcode.h>
 #include <asm/processor.h>
 #include <asm/cpu_device_id.h>
+#include <asm/perf_event.h>
 
 MODULE_DESCRIPTION("Microcode Update Driver");
 MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -277,7 +278,6 @@ static int reload_for_cpu(int cpu)
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	int err = 0;
 
-	mutex_lock(&microcode_mutex);
 	if (uci->valid) {
 		enum ucode_state ustate;
 
@@ -288,7 +288,6 @@ static int reload_for_cpu(int cpu)
 			if (ustate == UCODE_ERROR)
 				err = -EINVAL;
 	}
-	mutex_unlock(&microcode_mutex);
 
 	return err;
 }
@@ -309,6 +308,7 @@ static ssize_t reload_store(struct device *dev,
 		return size;
 
 	get_online_cpus();
+	mutex_lock(&microcode_mutex);
 	for_each_online_cpu(cpu) {
 		tmp_ret = reload_for_cpu(cpu);
 		if (tmp_ret != 0)
@@ -318,6 +318,9 @@ static ssize_t reload_store(struct device *dev,
 		if (!ret)
 			ret = tmp_ret;
 	}
+	if (!ret)
+		perf_check_microcode();
+	mutex_unlock(&microcode_mutex);
 	put_online_cpus();
 
 	if (!ret)
@@ -557,7 +560,8 @@ static int __init microcode_init(void)
 	mutex_lock(&microcode_mutex);
 
 	error = subsys_interface_register(&mc_cpu_interface);
-
+	if (!error)
+		perf_check_microcode();
 	mutex_unlock(&microcode_mutex);
 	put_online_cpus();
 
-- 
cgit v1.2.3


From 3e0091e2b6f8cd59e567f247e345a3a6ad1f6e7e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 26 Jun 2012 23:38:39 +0200
Subject: perf/x86: Save a few bytes in 'struct x86_pmu'

All these are basically boolean flags, use a bitfield to save a few
bytes.

Suggested-by: Borislav Petkov <bp@amd64.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-vsevd5g8lhcn129n3s7trl7r@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 3f5c66904355..a15df4be151f 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -374,8 +374,11 @@ struct x86_pmu {
 	/*
 	 * Intel DebugStore bits
 	 */
-	int		bts, pebs;
-	int		bts_active, pebs_active, pebs_broken;
+	int		bts		:1,
+			bts_active	:1,
+			pebs		:1,
+			pebs_active	:1,
+			pebs_broken	:1;
 	int		pebs_record_size;
 	void		(*drain_pebs)(struct pt_regs *regs);
 	struct event_constraint *pebs_constraints;
-- 
cgit v1.2.3


From eca26c9950f4d3e9c92ba275e9f4aee834aa1913 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Wed, 27 Jun 2012 15:09:12 +0800
Subject: perf/x86: Use 0xff as pseudo code for fixed uncore event

Stephane Eranian suggestted using 0xff as pseudo code for fixed
uncore event and using the umask value to determine which of the
fixed events we want to map to. So far there is at most one fixed
counter in a uncore PMU. So just change the definition of
UNCORE_FIXED_EVENT to 0xff.

Suggested-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1340780953-21130-1-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 4 ++--
 arch/x86/kernel/cpu/perf_event_intel_uncore.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 6f43f9584e3d..c42a3f7b5233 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -179,7 +179,7 @@ static struct attribute *snbep_uncore_pcu_formats_attr[] = {
 };
 
 static struct uncore_event_desc snbep_uncore_imc_events[] = {
-	INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0xff,umask=0xff"),
+	INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0xff,umask=0x00"),
 	INTEL_UNCORE_EVENT_DESC(cas_count_read,  "event=0x04,umask=0x03"),
 	INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
 	{ /* end: all zeroes */ },
@@ -616,7 +616,7 @@ static struct attribute_group nhm_uncore_format_group = {
 };
 
 static struct uncore_event_desc nhm_uncore_events[] = {
-	INTEL_UNCORE_EVENT_DESC(clockticks,                "event=0xff,umask=0xff"),
+	INTEL_UNCORE_EVENT_DESC(clockticks,                "event=0xff,umask=0x00"),
 	INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any,       "event=0x2f,umask=0x0f"),
 	INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any,      "event=0x2c,umask=0x0f"),
 	INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads,     "event=0x20,umask=0x01"),
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 4d52db0d1dfe..88498c7b3420 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -9,7 +9,7 @@
 
 #define UNCORE_PMU_HRTIMER_INTERVAL	(60 * NSEC_PER_SEC)
 
-#define UNCORE_FIXED_EVENT		0xffff
+#define UNCORE_FIXED_EVENT		0xff
 #define UNCORE_PMC_IDX_MAX_GENERIC	8
 #define UNCORE_PMC_IDX_FIXED		UNCORE_PMC_IDX_MAX_GENERIC
 #define UNCORE_PMC_IDX_MAX		(UNCORE_PMC_IDX_FIXED + 1)
-- 
cgit v1.2.3


From 3b19e4c98c035c9ab218fc64ef26f4f7a30eafb9 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Thu, 28 Jun 2012 14:56:36 +0800
Subject: perf/x86: Fix event constraint for SandyBridge-EP C-Box

The constraint for C-Box event 0x1f should have overlap flag set.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1340866596-22502-2-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index c42a3f7b5233..7d755d2e1c9c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -239,7 +239,7 @@ static struct event_constraint snbep_uncore_cbox_constraints[] = {
 	UNCORE_EVENT_CONSTRAINT(0x1c, 0xc),
 	UNCORE_EVENT_CONSTRAINT(0x1d, 0xc),
 	UNCORE_EVENT_CONSTRAINT(0x1e, 0xc),
-	UNCORE_EVENT_CONSTRAINT(0x1f, 0xe),
+	EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff),
 	UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
 	UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
 	UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
-- 
cgit v1.2.3


From 42089697244ba8e64fa43fb5e6d50d47a8e4cb00 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Wed, 4 Jul 2012 14:00:14 +0800
Subject: perf/x86: Detect number of instances of uncore CBox

The CBox manages the interface between the core and the LLC, so
the instances of uncore CBox is equal to number of cores.

Reported-by: Andrew Cooks <acooks@gmail.com>
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1341381616-12229-4-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 7d755d2e1c9c..4fecbd00ee75 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -1605,8 +1605,9 @@ static void __init uncore_cpu_setup(void *dummy)
 
 static int __init uncore_cpu_init(void)
 {
-	int ret, cpu;
+	int ret, cpu, max_cores;
 
+	max_cores = boot_cpu_data.x86_max_cores;
 	switch (boot_cpu_data.x86_model) {
 	case 26: /* Nehalem */
 	case 30:
@@ -1615,9 +1616,13 @@ static int __init uncore_cpu_init(void)
 		msr_uncores = nhm_msr_uncores;
 		break;
 	case 42: /* Sandy Bridge */
+		if (snb_uncore_cbox.num_boxes > max_cores)
+			snb_uncore_cbox.num_boxes = max_cores;
 		msr_uncores = snb_msr_uncores;
 		break;
 	case 45: /* Sandy Birdge-EP */
+		if (snbep_uncore_cbox.num_boxes > max_cores)
+			snbep_uncore_cbox.num_boxes = max_cores;
 		msr_uncores = snbep_msr_uncores;
 		break;
 	default:
-- 
cgit v1.2.3


From 6a67943a18c264d5f3df436da38edb3e59adc905 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Wed, 4 Jul 2012 14:00:15 +0800
Subject: perf/x86: Uncore filter support for SandyBridge-EP

This patch adds C-Box and PCU filter support for SandyBridge-EP
uncore. We can filter C-Box events by thread/core ID and filter
PCU events by frequency/voltage.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1341381616-12229-5-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 228 +++++++++++++++++++++-----
 arch/x86/kernel/cpu/perf_event_intel_uncore.h |  24 ++-
 2 files changed, 206 insertions(+), 46 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 4fecbd00ee75..19faffc60886 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -14,10 +14,13 @@ static cpumask_t uncore_cpu_mask;
 /* constraint for the fixed counter */
 static struct event_constraint constraint_fixed =
 	EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
+static struct event_constraint constraint_empty =
+	EVENT_CONSTRAINT(0, 0, 0);
 
 DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
 DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
 DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
 DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
 DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
 DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
@@ -26,8 +29,19 @@ DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
 DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
 DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
 DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
+DEFINE_UNCORE_FORMAT_ATTR(filter_brand0, filter_brand0, "config1:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(filter_brand1, filter_brand1, "config1:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(filter_brand2, filter_brand2, "config1:16-23");
+DEFINE_UNCORE_FORMAT_ATTR(filter_brand3, filter_brand3, "config1:24-31");
 
 /* Sandy Bridge-EP uncore support */
+static struct intel_uncore_type snbep_uncore_cbox;
+static struct intel_uncore_type snbep_uncore_pcu;
+
 static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
 {
 	struct pci_dev *pdev = box->pci_dev;
@@ -120,6 +134,10 @@ static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box,
 					struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
+	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+	if (reg1->idx != EXTRA_REG_NONE)
+		wrmsrl(reg1->reg, reg1->config);
 
 	wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
 }
@@ -149,6 +167,71 @@ static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
 		wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
 }
 
+static struct event_constraint *
+snbep_uncore_get_constraint(struct intel_uncore_box *box,
+			    struct perf_event *event)
+{
+	struct intel_uncore_extra_reg *er;
+	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+	unsigned long flags;
+	bool ok = false;
+
+	if (reg1->idx == EXTRA_REG_NONE || (box->phys_id >= 0 && reg1->alloc))
+		return NULL;
+
+	er = &box->shared_regs[reg1->idx];
+	raw_spin_lock_irqsave(&er->lock, flags);
+	if (!atomic_read(&er->ref) || er->config1 == reg1->config) {
+		atomic_inc(&er->ref);
+		er->config1 = reg1->config;
+		ok = true;
+	}
+	raw_spin_unlock_irqrestore(&er->lock, flags);
+
+	if (ok) {
+		if (box->phys_id >= 0)
+			reg1->alloc = 1;
+		return NULL;
+	}
+	return &constraint_empty;
+}
+
+static void snbep_uncore_put_constraint(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct intel_uncore_extra_reg *er;
+	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+
+	if (box->phys_id < 0 || !reg1->alloc)
+		return;
+
+	er = &box->shared_regs[reg1->idx];
+	atomic_dec(&er->ref);
+	reg1->alloc = 0;
+}
+
+static int snbep_uncore_hw_config(struct intel_uncore_box *box,
+				  struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+	if (box->pmu->type == &snbep_uncore_cbox) {
+		reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
+			SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+		reg1->config = event->attr.config1 &
+			SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK;
+	} else if (box->pmu->type == &snbep_uncore_pcu) {
+		reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
+		reg1->config = event->attr.config1 &
+			SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK;
+	} else {
+		return 0;
+	}
+	reg1->idx = 0;
+	return 0;
+}
+
 static struct attribute *snbep_uncore_formats_attr[] = {
 	&format_attr_event.attr,
 	&format_attr_umask.attr,
@@ -167,6 +250,20 @@ static struct attribute *snbep_uncore_ubox_formats_attr[] = {
 	NULL,
 };
 
+static struct attribute *snbep_uncore_cbox_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_tid_en.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh8.attr,
+	&format_attr_filter_tid.attr,
+	&format_attr_filter_nid.attr,
+	&format_attr_filter_state.attr,
+	&format_attr_filter_opc.attr,
+	NULL,
+};
+
 static struct attribute *snbep_uncore_pcu_formats_attr[] = {
 	&format_attr_event.attr,
 	&format_attr_occ_sel.attr,
@@ -175,6 +272,10 @@ static struct attribute *snbep_uncore_pcu_formats_attr[] = {
 	&format_attr_thresh5.attr,
 	&format_attr_occ_invert.attr,
 	&format_attr_occ_edge.attr,
+	&format_attr_filter_brand0.attr,
+	&format_attr_filter_brand1.attr,
+	&format_attr_filter_brand2.attr,
+	&format_attr_filter_brand3.attr,
 	NULL,
 };
 
@@ -203,6 +304,11 @@ static struct attribute_group snbep_uncore_ubox_format_group = {
 	.attrs = snbep_uncore_ubox_formats_attr,
 };
 
+static struct attribute_group snbep_uncore_cbox_format_group = {
+	.name = "format",
+	.attrs = snbep_uncore_cbox_formats_attr,
+};
+
 static struct attribute_group snbep_uncore_pcu_format_group = {
 	.name = "format",
 	.attrs = snbep_uncore_pcu_formats_attr,
@@ -215,6 +321,9 @@ static struct intel_uncore_ops snbep_uncore_msr_ops = {
 	.disable_event	= snbep_uncore_msr_disable_event,
 	.enable_event	= snbep_uncore_msr_enable_event,
 	.read_counter	= snbep_uncore_msr_read_counter,
+	.get_constraint = snbep_uncore_get_constraint,
+	.put_constraint = snbep_uncore_put_constraint,
+	.hw_config	= snbep_uncore_hw_config,
 };
 
 static struct intel_uncore_ops snbep_uncore_pci_ops = {
@@ -307,31 +416,33 @@ static struct intel_uncore_type snbep_uncore_ubox = {
 };
 
 static struct intel_uncore_type snbep_uncore_cbox = {
-	.name		= "cbox",
-	.num_counters   = 4,
-	.num_boxes	= 8,
-	.perf_ctr_bits	= 44,
-	.event_ctl	= SNBEP_C0_MSR_PMON_CTL0,
-	.perf_ctr	= SNBEP_C0_MSR_PMON_CTR0,
-	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,
-	.box_ctl	= SNBEP_C0_MSR_PMON_BOX_CTL,
-	.msr_offset	= SNBEP_CBO_MSR_OFFSET,
-	.constraints	= snbep_uncore_cbox_constraints,
-	.ops		= &snbep_uncore_msr_ops,
-	.format_group	= &snbep_uncore_format_group,
+	.name			= "cbox",
+	.num_counters		= 4,
+	.num_boxes		= 8,
+	.perf_ctr_bits		= 44,
+	.event_ctl		= SNBEP_C0_MSR_PMON_CTL0,
+	.perf_ctr		= SNBEP_C0_MSR_PMON_CTR0,
+	.event_mask		= SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+	.box_ctl		= SNBEP_C0_MSR_PMON_BOX_CTL,
+	.msr_offset		= SNBEP_CBO_MSR_OFFSET,
+	.num_shared_regs	= 1,
+	.constraints		= snbep_uncore_cbox_constraints,
+	.ops			= &snbep_uncore_msr_ops,
+	.format_group		= &snbep_uncore_cbox_format_group,
 };
 
 static struct intel_uncore_type snbep_uncore_pcu = {
-	.name		= "pcu",
-	.num_counters   = 4,
-	.num_boxes	= 1,
-	.perf_ctr_bits	= 48,
-	.perf_ctr	= SNBEP_PCU_MSR_PMON_CTR0,
-	.event_ctl	= SNBEP_PCU_MSR_PMON_CTL0,
-	.event_mask	= SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
-	.box_ctl	= SNBEP_PCU_MSR_PMON_BOX_CTL,
-	.ops		= &snbep_uncore_msr_ops,
-	.format_group	= &snbep_uncore_pcu_format_group,
+	.name			= "pcu",
+	.num_counters		= 4,
+	.num_boxes		= 1,
+	.perf_ctr_bits		= 48,
+	.perf_ctr		= SNBEP_PCU_MSR_PMON_CTR0,
+	.event_ctl		= SNBEP_PCU_MSR_PMON_CTL0,
+	.event_mask		= SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+	.box_ctl		= SNBEP_PCU_MSR_PMON_BOX_CTL,
+	.num_shared_regs	= 1,
+	.ops			= &snbep_uncore_msr_ops,
+	.format_group		= &snbep_uncore_pcu_format_group,
 };
 
 static struct intel_uncore_type *snbep_msr_uncores[] = {
@@ -747,15 +858,22 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
 	box->hrtimer.function = uncore_pmu_hrtimer;
 }
 
-struct intel_uncore_box *uncore_alloc_box(int cpu)
+struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
+					  int cpu)
 {
 	struct intel_uncore_box *box;
+	int i, size;
 
-	box = kmalloc_node(sizeof(*box), GFP_KERNEL | __GFP_ZERO,
-			   cpu_to_node(cpu));
+	size = sizeof(*box) + type->num_shared_regs *
+		sizeof(struct intel_uncore_extra_reg);
+
+	box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu));
 	if (!box)
 		return NULL;
 
+	for (i = 0; i < type->num_shared_regs; i++)
+		raw_spin_lock_init(&box->shared_regs[i].lock);
+
 	uncore_pmu_init_hrtimer(box);
 	atomic_set(&box->refcnt, 1);
 	box->cpu = -1;
@@ -834,11 +952,18 @@ static int uncore_collect_events(struct intel_uncore_box *box,
 }
 
 static struct event_constraint *
-uncore_event_constraint(struct intel_uncore_type *type,
-			struct perf_event *event)
+uncore_get_event_constraint(struct intel_uncore_box *box,
+			    struct perf_event *event)
 {
+	struct intel_uncore_type *type = box->pmu->type;
 	struct event_constraint *c;
 
+	if (type->ops->get_constraint) {
+		c = type->ops->get_constraint(box, event);
+		if (c)
+			return c;
+	}
+
 	if (event->hw.config == ~0ULL)
 		return &constraint_fixed;
 
@@ -852,19 +977,25 @@ uncore_event_constraint(struct intel_uncore_type *type,
 	return &type->unconstrainted;
 }
 
+static void uncore_put_event_constraint(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	if (box->pmu->type->ops->put_constraint)
+		box->pmu->type->ops->put_constraint(box, event);
+}
+
 static int uncore_assign_events(struct intel_uncore_box *box,
 				int assign[], int n)
 {
 	unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
 	struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
-	int i, ret, wmin, wmax;
+	int i, wmin, wmax, ret = 0;
 	struct hw_perf_event *hwc;
 
 	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
 
 	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
-		c = uncore_event_constraint(box->pmu->type,
-				box->event_list[i]);
+		c = uncore_get_event_constraint(box, box->event_list[i]);
 		constraints[i] = c;
 		wmin = min(wmin, c->weight);
 		wmax = max(wmax, c->weight);
@@ -888,13 +1019,17 @@ static int uncore_assign_events(struct intel_uncore_box *box,
 			break;
 
 		__set_bit(hwc->idx, used_mask);
-		assign[i] = hwc->idx;
+		if (assign)
+			assign[i] = hwc->idx;
 	}
-	if (i == n)
-		return 0;
-
 	/* slow path */
-	ret = perf_assign_events(constraints, n, wmin, wmax, assign);
+	if (i != n)
+		ret = perf_assign_events(constraints, n, wmin, wmax, assign);
+
+	if (!assign || ret) {
+		for (i = 0; i < n; i++)
+			uncore_put_event_constraint(box, box->event_list[i]);
+	}
 	return ret ? -EINVAL : 0;
 }
 
@@ -1021,6 +1156,8 @@ static void uncore_pmu_event_del(struct perf_event *event, int flags)
 
 	for (i = 0; i < box->n_events; i++) {
 		if (event == box->event_list[i]) {
+			uncore_put_event_constraint(box, event);
+
 			while (++i < box->n_events)
 				box->event_list[i - 1] = box->event_list[i];
 
@@ -1048,10 +1185,9 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
 {
 	struct perf_event *leader = event->group_leader;
 	struct intel_uncore_box *fake_box;
-	int assign[UNCORE_PMC_IDX_MAX];
 	int ret = -EINVAL, n;
 
-	fake_box = uncore_alloc_box(smp_processor_id());
+	fake_box = uncore_alloc_box(pmu->type, smp_processor_id());
 	if (!fake_box)
 		return -ENOMEM;
 
@@ -1073,7 +1209,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
 
 	fake_box->n_events = n;
 
-	ret = uncore_assign_events(fake_box, assign, n);
+	ret = uncore_assign_events(fake_box, NULL, n);
 out:
 	kfree(fake_box);
 	return ret;
@@ -1117,6 +1253,10 @@ int uncore_pmu_event_init(struct perf_event *event)
 		return -EINVAL;
 	event->cpu = box->cpu;
 
+	event->hw.idx = -1;
+	event->hw.last_tag = ~0ULL;
+	event->hw.extra_reg.idx = EXTRA_REG_NONE;
+
 	if (event->attr.config == UNCORE_FIXED_EVENT) {
 		/* no fixed counter */
 		if (!pmu->type->fixed_ctl)
@@ -1130,11 +1270,13 @@ int uncore_pmu_event_init(struct perf_event *event)
 		hwc->config = ~0ULL;
 	} else {
 		hwc->config = event->attr.config & pmu->type->event_mask;
+		if (pmu->type->ops->hw_config) {
+			ret = pmu->type->ops->hw_config(box, event);
+			if (ret)
+				return ret;
+		}
 	}
 
-	event->hw.idx = -1;
-	event->hw.last_tag = ~0ULL;
-
 	if (event->group_leader != event)
 		ret = uncore_validate_group(pmu, event);
 	else
@@ -1276,7 +1418,7 @@ static int __devinit uncore_pci_add(struct intel_uncore_type *type,
 	if (phys_id < 0)
 		return -ENODEV;
 
-	box = uncore_alloc_box(0);
+	box = uncore_alloc_box(type, 0);
 	if (!box)
 		return -ENOMEM;
 
@@ -1458,7 +1600,7 @@ static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id)
 			if (pmu->func_id < 0)
 				pmu->func_id = j;
 
-			box = uncore_alloc_box(cpu);
+			box = uncore_alloc_box(type, cpu);
 			if (!box)
 				return -ENOMEM;
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 88498c7b3420..b13e9ea81def 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -97,6 +97,10 @@
 				 SNBEP_PMON_CTL_INVERT | \
 				 SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
 
+#define SNBEP_CBO_PMON_CTL_TID_EN		(1 << 19)
+#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK	(SNBEP_PMON_RAW_EVENT_MASK | \
+						 SNBEP_CBO_PMON_CTL_TID_EN)
+
 /* SNB-EP PCU event control */
 #define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK	0x0000c000
 #define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK	0x1f000000
@@ -140,15 +144,17 @@
 /* SNB-EP Cbo register */
 #define SNBEP_C0_MSR_PMON_CTR0			0xd16
 #define SNBEP_C0_MSR_PMON_CTL0			0xd10
-#define SNBEP_C0_MSR_PMON_BOX_FILTER		0xd14
 #define SNBEP_C0_MSR_PMON_BOX_CTL		0xd04
+#define SNBEP_C0_MSR_PMON_BOX_FILTER		0xd14
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK	0xfffffc1f
 #define SNBEP_CBO_MSR_OFFSET			0x20
 
 /* SNB-EP PCU register */
 #define SNBEP_PCU_MSR_PMON_CTR0			0xc36
 #define SNBEP_PCU_MSR_PMON_CTL0			0xc30
-#define SNBEP_PCU_MSR_PMON_BOX_FILTER		0xc34
 #define SNBEP_PCU_MSR_PMON_BOX_CTL		0xc24
+#define SNBEP_PCU_MSR_PMON_BOX_FILTER		0xc34
+#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK	0xffffffff
 #define SNBEP_PCU_MSR_CORE_C3_CTR		0x3fc
 #define SNBEP_PCU_MSR_CORE_C6_CTR		0x3fd
 
@@ -163,7 +169,6 @@ struct intel_uncore_type {
 	int num_boxes;
 	int perf_ctr_bits;
 	int fixed_ctr_bits;
-	int single_fixed;
 	unsigned perf_ctr;
 	unsigned event_ctl;
 	unsigned event_mask;
@@ -171,6 +176,8 @@ struct intel_uncore_type {
 	unsigned fixed_ctl;
 	unsigned box_ctl;
 	unsigned msr_offset;
+	unsigned num_shared_regs:8;
+	unsigned single_fixed:1;
 	struct event_constraint unconstrainted;
 	struct event_constraint *constraints;
 	struct intel_uncore_pmu *pmus;
@@ -188,6 +195,10 @@ struct intel_uncore_ops {
 	void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
 	void (*enable_event)(struct intel_uncore_box *, struct perf_event *);
 	u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *);
+	int (*hw_config)(struct intel_uncore_box *, struct perf_event *);
+	struct event_constraint *(*get_constraint)(struct intel_uncore_box *,
+						   struct perf_event *);
+	void (*put_constraint)(struct intel_uncore_box *, struct perf_event *);
 };
 
 struct intel_uncore_pmu {
@@ -200,6 +211,12 @@ struct intel_uncore_pmu {
 	struct list_head box_list;
 };
 
+struct intel_uncore_extra_reg {
+	raw_spinlock_t lock;
+	u64 config1;
+	atomic_t ref;
+};
+
 struct intel_uncore_box {
 	int phys_id;
 	int n_active;	/* number of active events */
@@ -215,6 +232,7 @@ struct intel_uncore_box {
 	struct intel_uncore_pmu *pmu;
 	struct hrtimer hrtimer;
 	struct list_head list;
+	struct intel_uncore_extra_reg shared_regs[0];
 };
 
 #define UNCORE_BOX_FLAG_INITIATED	0
-- 
cgit v1.2.3


From 2d3f6ccc4ea5c74d4b4af1b47c56b4cff4bbfcb7 Mon Sep 17 00:00:00 2001
From: Simon Wunderlich <simon.wunderlich@s2003.tu-chemnitz.de>
Date: Wed, 4 Jul 2012 20:38:19 +0200
Subject: batman-adv: check incoming packet type for bla

If the gateway functionality is used, some broadcast packets (DHCP
requests) may be transmitted as unicast packets. As the bridge loop
avoidance code now only considers the payload Ethernet destination,
it may drop the DHCP request for clients which are claimed by other
backbone gateways, because it falsely infers from the broadcast address
that the right backbone gateway should havehandled the broadcast.

Fix this by checking and delegating the batman-adv packet type used
for transmission.

Reported-by: Guido Iribarren <guidoiribarren@buenosaireslibre.org>
Signed-off-by: Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
---
 net/batman-adv/bridge_loop_avoidance.c | 15 +++++++++++----
 net/batman-adv/bridge_loop_avoidance.h |  5 +++--
 net/batman-adv/soft-interface.c        |  6 +++++-
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 8bf97515a77d..c5863f499133 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1351,6 +1351,7 @@ void bla_free(struct bat_priv *bat_priv)
  * @bat_priv: the bat priv with all the soft interface information
  * @skb: the frame to be checked
  * @vid: the VLAN ID of the frame
+ * @is_bcast: the packet came in a broadcast packet type.
  *
  * bla_rx avoidance checks if:
  *  * we have to race for a claim
@@ -1361,7 +1362,8 @@ void bla_free(struct bat_priv *bat_priv)
  * process the skb.
  *
  */
-int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid)
+int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid,
+	   bool is_bcast)
 {
 	struct ethhdr *ethhdr;
 	struct claim search_claim, *claim = NULL;
@@ -1380,7 +1382,7 @@ int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid)
 
 	if (unlikely(atomic_read(&bat_priv->bla_num_requests)))
 		/* don't allow broadcasts while requests are in flight */
-		if (is_multicast_ether_addr(ethhdr->h_dest))
+		if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast)
 			goto handled;
 
 	memcpy(search_claim.addr, ethhdr->h_source, ETH_ALEN);
@@ -1406,8 +1408,13 @@ int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid)
 	}
 
 	/* if it is a broadcast ... */
-	if (is_multicast_ether_addr(ethhdr->h_dest)) {
-		/* ... drop it. the responsible gateway is in charge. */
+	if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) {
+		/* ... drop it. the responsible gateway is in charge.
+		 *
+		 * We need to check is_bcast because with the gateway
+		 * feature, broadcasts (like DHCP requests) may be sent
+		 * using a unicast packet type.
+		 */
 		goto handled;
 	} else {
 		/* seems the client considers us as its best gateway.
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index e39f93acc28f..dc5227b398d4 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -23,7 +23,8 @@
 #define _NET_BATMAN_ADV_BLA_H_
 
 #ifdef CONFIG_BATMAN_ADV_BLA
-int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid);
+int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid,
+	   bool is_bcast);
 int bla_tx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid);
 int bla_is_backbone_gw(struct sk_buff *skb,
 		       struct orig_node *orig_node, int hdr_size);
@@ -41,7 +42,7 @@ void bla_free(struct bat_priv *bat_priv);
 #else /* ifdef CONFIG_BATMAN_ADV_BLA */
 
 static inline int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb,
-			 short vid)
+			 short vid, bool is_bcast)
 {
 	return 0;
 }
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 6e2530b02043..a0ec0e4ada4c 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -256,7 +256,11 @@ void interface_rx(struct net_device *soft_iface,
 	struct bat_priv *bat_priv = netdev_priv(soft_iface);
 	struct ethhdr *ethhdr;
 	struct vlan_ethhdr *vhdr;
+	struct batman_header *batadv_header = (struct batman_header *)skb->data;
 	short vid __maybe_unused = -1;
+	bool is_bcast;
+
+	is_bcast = (batadv_header->packet_type == BAT_BCAST);
 
 	/* check if enough space is available for pulling, and pull */
 	if (!pskb_may_pull(skb, hdr_size))
@@ -302,7 +306,7 @@ void interface_rx(struct net_device *soft_iface,
 	/* Let the bridge loop avoidance check the packet. If will
 	 * not handle it, we can safely push it up.
 	 */
-	if (bla_rx(bat_priv, skb, vid))
+	if (bla_rx(bat_priv, skb, vid, is_bcast))
 		goto out;
 
 	netif_rx(skb);
-- 
cgit v1.2.3


From 326f418b4c74ab4a6066f38b6144bc6461f9447b Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Thu, 28 Jun 2012 15:04:57 -0400
Subject: tracepoint: Use static_key_false(), since static_branch() is
 deprecated

Convert the last user of static_branch() -> static_key_false().

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: rostedt@goodmis.org
Cc: mathieu.desnoyers@efficios.com
Cc: a.p.zijlstra@chello.nl
Link: http://lkml.kernel.org/r/5fffcd40a6c063769badcdd74a7d90980500dbcb.1340909155.git.jbaron@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/tracepoint.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index bd96ecd0e05c..802de56c41e8 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -153,7 +153,7 @@ static inline void tracepoint_synchronize_unregister(void)
 	}								\
 	static inline void trace_##name##_rcuidle(proto)		\
 	{								\
-		if (static_branch(&__tracepoint_##name.key))		\
+		if (static_key_false(&__tracepoint_##name.key))		\
 			__DO_TRACE(&__tracepoint_##name,		\
 				TP_PROTO(data_proto),			\
 				TP_ARGS(data_args),			\
-- 
cgit v1.2.3


From 47fbc518a4b5c9a949f7cab8b14a00d3549bf138 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Thu, 28 Jun 2012 15:05:02 -0400
Subject: jump label: Remove static_branch()

Remove the obsolete static_branch() interface, since the supported interface
is now static_key_false()/true() - which is used by all in-tree code.

See commit:

  c5905afb0e ("static keys: Introduce 'struct static_key', static_key_true()/false()
  and static_key_slow_[inc|dec]()").

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: rostedt@goodmis.org
Cc: mathieu.desnoyers@efficios.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/199332c47eef8005d5a5bf1018a80d25929a5746.1340909155.git.jbaron@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/jump_label.h | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index c513a40510f5..0976fc46d1e0 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -42,8 +42,7 @@
  * allowed.
  *
  * Not initializing the key (static data is initialized to 0s anyway) is the
- * same as using STATIC_KEY_INIT_FALSE and static_key_false() is
- * equivalent with static_branch().
+ * same as using STATIC_KEY_INIT_FALSE.
  *
 */
 
@@ -107,12 +106,6 @@ static __always_inline bool static_key_true(struct static_key *key)
 	return !static_key_false(key);
 }
 
-/* Deprecated. Please use 'static_key_false() instead. */
-static __always_inline bool static_branch(struct static_key *key)
-{
-	return arch_static_branch(key);
-}
-
 extern struct jump_entry __start___jump_table[];
 extern struct jump_entry __stop___jump_table[];
 
@@ -166,14 +159,6 @@ static __always_inline bool static_key_true(struct static_key *key)
 	return false;
 }
 
-/* Deprecated. Please use 'static_key_false() instead. */
-static __always_inline bool static_branch(struct static_key *key)
-{
-	if (unlikely(atomic_read(&key->enabled)) > 0)
-		return true;
-	return false;
-}
-
 static inline void static_key_slow_inc(struct static_key *key)
 {
 	atomic_inc(&key->enabled);
-- 
cgit v1.2.3


From b39f25a849d7677a7dbf183f2483fd41c201a5ce Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 25 Jun 2012 13:38:27 -0700
Subject: x86/apic: Optimize cpu traversal in __assign_irq_vector() using
 domain membership

Currently __assign_irq_vector() goes through each cpu in the
specified mask until it finds a free vector in all the cpu's
that are part of the same interrupt domain. We visit all the
interrupt domain sibling cpus to reserve the free vector. So,
when we fail to find a free vector in an interrupt domain, it is
safe to continue our search with a cpu belonging to a new
interrupt domain. No need to go through each cpu, if the domain
containing that cpu is already visited.

Use the irq_cfg's old_domain to track the visited domains and
optimize the cpu traversal while finding a free vector in the
given cpumask.

NOTE: We can also optimize the search by using for_each_cpu() and
skip the current cpu, if it is not the first cpu in the mask
returned by the vector_allocation_domain(). But re-using the
cfg->old_domain to track the visited domains will be slightly
faster.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Alexander Gordeev <agordeev@redhat.com>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Link: http://lkml.kernel.org/r/1340656709-11423-2-git-send-email-suresh.b.siddha@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/apic.h           |  8 +++-----
 arch/x86/kernel/apic/apic_noop.c      |  3 +--
 arch/x86/kernel/apic/io_apic.c        | 15 ++++++++-------
 arch/x86/kernel/apic/x2apic_cluster.c |  3 +--
 arch/x86/kernel/vsmp_64.c             |  3 +--
 5 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index eec240e12091..8bebeb8952fb 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -306,7 +306,7 @@ struct apic {
 	unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid);
 	unsigned long (*check_apicid_present)(int apicid);
 
-	bool (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
+	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
 	void (*init_apic_ldr)(void);
 
 	void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
@@ -614,7 +614,7 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 			       const struct cpumask *andmask,
 			       unsigned int *apicid);
 
-static inline bool
+static inline void
 flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
@@ -627,14 +627,12 @@ flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
 	 */
 	cpumask_clear(retmask);
 	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-	return false;
 }
 
-static inline bool
+static inline void
 default_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	cpumask_copy(retmask, cpumask_of(cpu));
-	return true;
 }
 
 static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid)
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 65c07fc630a1..08c337bc49ff 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -100,12 +100,11 @@ static unsigned long noop_check_apicid_present(int bit)
 	return physid_isset(bit, phys_cpu_present_map);
 }
 
-static bool noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
+static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	if (cpu != 0)
 		pr_warning("APIC: Vector allocated for non-BSP cpu\n");
 	cpumask_copy(retmask, cpumask_of(cpu));
-	return true;
 }
 
 static u32 noop_apic_read(u32 reg)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index a951ef7decb1..8a08f09aa505 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1134,12 +1134,13 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 
 	/* Only try and allocate irqs on cpus that are present */
 	err = -ENOSPC;
-	for_each_cpu_and(cpu, mask, cpu_online_mask) {
+	cpumask_clear(cfg->old_domain);
+	cpu = cpumask_first_and(mask, cpu_online_mask);
+	while (cpu < nr_cpu_ids) {
 		int new_cpu;
 		int vector, offset;
-		bool more_domains;
 
-		more_domains = apic->vector_allocation_domain(cpu, tmp_mask);
+		apic->vector_allocation_domain(cpu, tmp_mask);
 
 		if (cpumask_subset(tmp_mask, cfg->domain)) {
 			free_cpumask_var(tmp_mask);
@@ -1156,10 +1157,10 @@ next:
 		}
 
 		if (unlikely(current_vector == vector)) {
-			if (more_domains)
-				continue;
-			else
-				break;
+			cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
+			cpumask_andnot(tmp_mask, mask, cfg->old_domain);
+			cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
+			continue;
 		}
 
 		if (test_bit(vector, used_vectors))
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 943d03fc6fc4..b5d889b5659a 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -212,11 +212,10 @@ static int x2apic_cluster_probe(void)
 /*
  * Each x2apic cluster is an allocation domain.
  */
-static bool cluster_vector_allocation_domain(int cpu, struct cpumask *retmask)
+static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	cpumask_clear(retmask);
 	cpumask_copy(retmask, per_cpu(cpus_in_cluster, cpu));
-	return true;
 }
 
 static struct apic apic_x2apic_cluster = {
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index fa5adb7c228c..3f0285ac00fa 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -208,10 +208,9 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
  * In vSMP, all cpus should be capable of handling interrupts, regardless of
  * the APIC used.
  */
-static bool fill_vector_allocation_domain(int cpu, struct cpumask *retmask)
+static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	cpumask_setall(retmask);
-	return false;
 }
 
 static void vsmp_apic_post_init(void)
-- 
cgit v1.2.3


From 1ac322d0b169c95ce34d55b3ed6d40ce1a5f3a02 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 25 Jun 2012 13:38:28 -0700
Subject: x86/apic/x2apic: Limit the vector reservation to the user specified
 mask

For the x2apic cluster mode, vector for an interrupt is
currently reserved on all the cpu's that are part of the x2apic
cluster. But the interrupts will be routed only to the cluster
(derived from the first cpu in the mask) members specified in
the mask. So there is no need to reserve the vector in the
unused cluster members.

Modify __assign_irq_vector() to reserve the vectors based on the
user specified irq destination mask. If the new mask is a proper
subset of the currently used mask, cleanup the vector allocation
on the unused cpu members.

Also, allow the apic driver to tune the vector domain based on
the affinity mask (which in most cases is the user-specified
mask).

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Alexander Gordeev <agordeev@redhat.com>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Link: http://lkml.kernel.org/r/1340656709-11423-3-git-send-email-suresh.b.siddha@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/apic.h           |  9 ++++++---
 arch/x86/kernel/apic/apic_noop.c      |  3 ++-
 arch/x86/kernel/apic/io_apic.c        | 31 +++++++++++++++----------------
 arch/x86/kernel/apic/x2apic_cluster.c |  6 +++---
 arch/x86/kernel/vsmp_64.c             |  3 ++-
 5 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 8bebeb8952fb..88093c1d44fd 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -306,7 +306,8 @@ struct apic {
 	unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid);
 	unsigned long (*check_apicid_present)(int apicid);
 
-	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
+	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask,
+					 const struct cpumask *mask);
 	void (*init_apic_ldr)(void);
 
 	void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
@@ -615,7 +616,8 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 			       unsigned int *apicid);
 
 static inline void
-flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
+flat_vector_allocation_domain(int cpu, struct cpumask *retmask,
+			      const struct cpumask *mask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -630,7 +632,8 @@ flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
 }
 
 static inline void
-default_vector_allocation_domain(int cpu, struct cpumask *retmask)
+default_vector_allocation_domain(int cpu, struct cpumask *retmask,
+				 const struct cpumask *mask)
 {
 	cpumask_copy(retmask, cpumask_of(cpu));
 }
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 08c337bc49ff..e145f28b4099 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -100,7 +100,8 @@ static unsigned long noop_check_apicid_present(int bit)
 	return physid_isset(bit, phys_cpu_present_map);
 }
 
-static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
+static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
+					  const struct cpumask *mask)
 {
 	if (cpu != 0)
 		pr_warning("APIC: Vector allocated for non-BSP cpu\n");
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8a08f09aa505..9684f963befe 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1113,7 +1113,6 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 	 */
 	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
 	static int current_offset = VECTOR_OFFSET_START % 16;
-	unsigned int old_vector;
 	int cpu, err;
 	cpumask_var_t tmp_mask;
 
@@ -1123,28 +1122,28 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
 		return -ENOMEM;
 
-	old_vector = cfg->vector;
-	if (old_vector) {
-		cpumask_and(tmp_mask, mask, cpu_online_mask);
-		if (cpumask_subset(tmp_mask, cfg->domain)) {
-			free_cpumask_var(tmp_mask);
-			return 0;
-		}
-	}
-
 	/* Only try and allocate irqs on cpus that are present */
 	err = -ENOSPC;
 	cpumask_clear(cfg->old_domain);
 	cpu = cpumask_first_and(mask, cpu_online_mask);
 	while (cpu < nr_cpu_ids) {
-		int new_cpu;
-		int vector, offset;
+		int new_cpu, vector, offset;
 
-		apic->vector_allocation_domain(cpu, tmp_mask);
+		apic->vector_allocation_domain(cpu, tmp_mask, mask);
 
 		if (cpumask_subset(tmp_mask, cfg->domain)) {
-			free_cpumask_var(tmp_mask);
-			return 0;
+			err = 0;
+			if (cpumask_equal(tmp_mask, cfg->domain))
+				break;
+			/*
+			 * New cpumask using the vector is a proper subset of
+			 * the current in use mask. So cleanup the vector
+			 * allocation for the members that are not used anymore.
+			 */
+			cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
+			cfg->move_in_progress = 1;
+			cpumask_and(cfg->domain, cfg->domain, tmp_mask);
+			break;
 		}
 
 		vector = current_vector;
@@ -1172,7 +1171,7 @@ next:
 		/* Found one! */
 		current_vector = vector;
 		current_offset = offset;
-		if (old_vector) {
+		if (cfg->vector) {
 			cfg->move_in_progress = 1;
 			cpumask_copy(cfg->old_domain, cfg->domain);
 		}
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index b5d889b5659a..bde78d0098a4 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -212,10 +212,10 @@ static int x2apic_cluster_probe(void)
 /*
  * Each x2apic cluster is an allocation domain.
  */
-static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask)
+static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
+					     const struct cpumask *mask)
 {
-	cpumask_clear(retmask);
-	cpumask_copy(retmask, per_cpu(cpus_in_cluster, cpu));
+	cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
 }
 
 static struct apic apic_x2apic_cluster = {
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 3f0285ac00fa..992f890283e9 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -208,7 +208,8 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
  * In vSMP, all cpus should be capable of handling interrupts, regardless of
  * the APIC used.
  */
-static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask)
+static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask,
+					  const struct cpumask *mask)
 {
 	cpumask_setall(retmask);
 }
-- 
cgit v1.2.3


From d872818dbbeed1bccf58c7f8c7db432154c802f9 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 25 Jun 2012 13:38:29 -0700
Subject: x86/apic/x2apic: Use multiple cluster members for the irq destination
 only with the explicit affinity

During boot or driver load etc, interrupt destination is setup
using default target cpu's. Later the user (irqbalance etc) or
the driver (irq_set_affinity/ irq_set_affinity_hint) can request
the interrupt to be migrated to some specific set of cpu's.

In the x2apic cluster routing, for the default scenario use
single cpu as the interrupt destination and when there is an
explicit interrupt affinity request, route the interrupt to
multiple members of a x2apic cluster specified in the cpumask of
the migration request.

This will minmize the vector pressure when there are lot of
interrupt sources and relatively few x2apic clusters (for
example a single socket server). This will allow the performance
critical interrupts to be routed to multiple cpu's in the x2apic
cluster (irqbalance for example uses the cache siblings etc
while specifying the interrupt destination) and allow
non-critical interrupts to be serviced by a single logical cpu.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Alexander Gordeev <agordeev@redhat.com>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Link: http://lkml.kernel.org/r/1340656709-11423-4-git-send-email-suresh.b.siddha@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/x2apic_cluster.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index bde78d0098a4..c88baa4ff0e5 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -209,13 +209,30 @@ static int x2apic_cluster_probe(void)
 		return 0;
 }
 
+static const struct cpumask *x2apic_cluster_target_cpus(void)
+{
+	return cpu_all_mask;
+}
+
 /*
  * Each x2apic cluster is an allocation domain.
  */
 static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
 					     const struct cpumask *mask)
 {
-	cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
+	/*
+	 * To minimize vector pressure, default case of boot, device bringup
+	 * etc will use a single cpu for the interrupt destination.
+	 *
+	 * On explicit migration requests coming from irqbalance etc,
+	 * interrupts will be routed to the x2apic cluster (cluster-id
+	 * derived from the first cpu in the mask) members specified
+	 * in the mask.
+	 */
+	if (mask == x2apic_cluster_target_cpus())
+		cpumask_copy(retmask, cpumask_of(cpu));
+	else
+		cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
 }
 
 static struct apic apic_x2apic_cluster = {
@@ -229,7 +246,7 @@ static struct apic apic_x2apic_cluster = {
 	.irq_delivery_mode		= dest_LowestPrio,
 	.irq_dest_mode			= 1, /* logical */
 
-	.target_cpus			= online_target_cpus,
+	.target_cpus			= x2apic_cluster_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
-- 
cgit v1.2.3


From 6a6dccba2fdc2a69f1f36b8f1c0acc8598e7221b Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Thu, 5 Jul 2012 15:52:23 +0530
Subject: mm: cma: don't replace lowmem pages with highmem

The filesystem layer expects pages in the block device's mapping to not
be in highmem (the mapping's gfp mask is set in bdget()), but CMA can
currently replace lowmem pages with highmem pages, leading to crashes in
filesystem code such as the one below:

  Unable to handle kernel NULL pointer dereference at virtual address 00000400
  pgd = c0c98000
  [00000400] *pgd=00c91831, *pte=00000000, *ppte=00000000
  Internal error: Oops: 817 [#1] PREEMPT SMP ARM
  CPU: 0    Not tainted  (3.5.0-rc5+ #80)
  PC is at __memzero+0x24/0x80
  ...
  Process fsstress (pid: 323, stack limit = 0xc0cbc2f0)
  Backtrace:
  [<c010e3f0>] (ext4_getblk+0x0/0x180) from [<c010e58c>] (ext4_bread+0x1c/0x98)
  [<c010e570>] (ext4_bread+0x0/0x98) from [<c0117944>] (ext4_mkdir+0x160/0x3bc)
   r4:c15337f0
  [<c01177e4>] (ext4_mkdir+0x0/0x3bc) from [<c00c29e0>] (vfs_mkdir+0x8c/0x98)
  [<c00c2954>] (vfs_mkdir+0x0/0x98) from [<c00c2a60>] (sys_mkdirat+0x74/0xac)
   r6:00000000 r5:c152eb40 r4:000001ff r3:c14b43f0
  [<c00c29ec>] (sys_mkdirat+0x0/0xac) from [<c00c2ab8>] (sys_mkdir+0x20/0x24)
   r6:beccdcf0 r5:00074000 r4:beccdbbc
  [<c00c2a98>] (sys_mkdir+0x0/0x24) from [<c000e3c0>] (ret_fast_syscall+0x0/0x30)

Fix this by replacing only highmem pages with highmem.

Reported-by: Laura Abbott <lauraa@codeaurora.org>
Signed-off-by: Rabin Vincent <rabin@rab.in>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 mm/page_alloc.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 44030096da63..4a4f9219683f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5635,7 +5635,12 @@ static struct page *
 __alloc_contig_migrate_alloc(struct page *page, unsigned long private,
 			     int **resultp)
 {
-	return alloc_page(GFP_HIGHUSER_MOVABLE);
+	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
+
+	if (PageHighMem(page))
+		gfp_mask |= __GFP_HIGHMEM;
+
+	return alloc_page(gfp_mask);
 }
 
 /* [start, end) must belong to a single zone. */
-- 
cgit v1.2.3


From cc2caea5b6152b8ce66dc2bbe83dc72b60612da8 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 6 Jul 2012 12:02:04 +0200
Subject: mm: cma: fix condition check when setting global cma area

dev_set_cma_area incorrectly assigned cma to global area on first call
due to incorrect check. This patch fixes this issue.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 include/asm-generic/dma-contiguous.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h
index c544356b374b..294b1e755ab2 100644
--- a/include/asm-generic/dma-contiguous.h
+++ b/include/asm-generic/dma-contiguous.h
@@ -18,7 +18,7 @@ static inline void dev_set_cma_area(struct device *dev, struct cma *cma)
 {
 	if (dev)
 		dev->cma_area = cma;
-	if (!dev || !dma_contiguous_default_area)
+	if (!dev && !dma_contiguous_default_area)
 		dma_contiguous_default_area = cma;
 }
 
-- 
cgit v1.2.3


From c3b7cdf180090d2686239a75bb0ae408108ed749 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@kernel.org>
Date: Fri, 6 Jul 2012 12:59:46 +0300
Subject: perf/x86: Fix intel_perfmon_event_mapformatting

Use tabs for "intel_perfmon_event_map" formatting in
perf_event_intel.c.

Signed-off-by: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: http://lkml.kernel.org/r/1341568786-7045-1-git-send-email-penberg@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 5fdedb4bc3f1..1f4c8add6759 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -21,14 +21,14 @@
  */
 static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
 {
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
-  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
-  [PERF_COUNT_HW_REF_CPU_CYCLES]	= 0x0300, /* pseudo-encoding */
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+	[PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
+	[PERF_COUNT_HW_REF_CPU_CYCLES]		= 0x0300, /* pseudo-encoding */
 };
 
 static struct event_constraint intel_core_event_constraints[] __read_mostly =
-- 
cgit v1.2.3


From cfca927972e31a5b3da49bf641c525732ff3c357 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Mon, 25 Jun 2012 12:54:17 -0700
Subject: rcu: Introduce check for callback list/count mismatch

The recent bug that introduced the RCU callback list/count mismatch
showed the need for a diagnostic to check for this, which this commit
adds.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutree.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 95c7b61e77e4..4154c9567a6d 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1612,6 +1612,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 		rdp->n_force_qs_snap = rsp->n_force_qs;
 	} else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
 		rdp->qlen_last_fqs_check = rdp->qlen;
+	WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0));
 
 	local_irq_restore(flags);
 
-- 
cgit v1.2.3


From c701d5d9b384ff03ceb232ef21236364d784a411 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Thu, 28 Jun 2012 08:08:25 -0700
Subject: rcu: Fix code-style issues involving "else"

The Linux kernel coding style says that single-statement blocks should
omit curly braces unless the other leg of the "if" statement has
multiple statements, in which case the curly braces should be included.
This commit fixes RCU's violations of this rule.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 kernel/rcutiny_plugin.h |  7 ++++---
 kernel/rcutorture.c     |  3 ++-
 kernel/rcutree.c        |  7 ++++---
 kernel/rcutree_plugin.h | 14 ++++++++------
 4 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 116725b5edfb..918fd1e8509c 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -350,8 +350,9 @@ static int rcu_initiate_boost(void)
 			rcu_preempt_ctrlblk.boost_tasks =
 				rcu_preempt_ctrlblk.gp_tasks;
 		invoke_rcu_callbacks();
-	} else
+	} else {
 		RCU_TRACE(rcu_initiate_boost_trace());
+	}
 	return 1;
 }
 
@@ -778,9 +779,9 @@ void synchronize_rcu_expedited(void)
 		rpcp->exp_tasks = NULL;
 
 	/* Wait for tail of ->blkd_tasks list to drain. */
-	if (!rcu_preempted_readers_exp())
+	if (!rcu_preempted_readers_exp()) {
 		local_irq_restore(flags);
-	else {
+	} else {
 		rcu_initiate_boost();
 		local_irq_restore(flags);
 		wait_event(sync_rcu_preempt_exp_wq,
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index c279ee920947..155fb129b641 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -408,8 +408,9 @@ rcu_torture_cb(struct rcu_head *p)
 	if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
 		rp->rtort_mbtest = 0;
 		rcu_torture_free(rp);
-	} else
+	} else {
 		cur_ops->deferred_free(rp);
+	}
 }
 
 static int rcu_no_completed(void)
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 117218a43724..f280e542e3e9 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -892,8 +892,9 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct
 		if (rnp->qsmask & rdp->grpmask) {
 			rdp->qs_pending = 1;
 			rdp->passed_quiesce = 0;
-		} else
+		} else {
 			rdp->qs_pending = 0;
+		}
 		zero_cpu_stall_ticks(rdp);
 	}
 }
@@ -2130,9 +2131,9 @@ void synchronize_sched_expedited(void)
 		put_online_cpus();
 
 		/* No joy, try again later.  Or just synchronize_sched(). */
-		if (trycount++ < 10)
+		if (trycount++ < 10) {
 			udelay(trycount * num_online_cpus());
-		else {
+		} else {
 			synchronize_sched();
 			return;
 		}
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index a9194d5606c4..7f3244c0df01 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -390,8 +390,9 @@ void rcu_read_unlock_special(struct task_struct *t)
 							 rnp->grphi,
 							 !!rnp->gp_tasks);
 			rcu_report_unblock_qs_rnp(rnp, flags);
-		} else
+		} else {
 			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		}
 
 #ifdef CONFIG_RCU_BOOST
 		/* Unboost if we were boosted. */
@@ -757,9 +758,9 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
 	int must_wait = 0;
 
 	raw_spin_lock_irqsave(&rnp->lock, flags);
-	if (list_empty(&rnp->blkd_tasks))
+	if (list_empty(&rnp->blkd_tasks)) {
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
-	else {
+	} else {
 		rnp->exp_tasks = rnp->blkd_tasks.next;
 		rcu_initiate_boost(rnp, flags);  /* releases rnp->lock */
 		must_wait = 1;
@@ -803,9 +804,9 @@ void synchronize_rcu_expedited(void)
 	 * expedited grace period for us, just leave.
 	 */
 	while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
-		if (trycount++ < 10)
+		if (trycount++ < 10) {
 			udelay(trycount * num_online_cpus());
-		else {
+		} else {
 			synchronize_rcu();
 			return;
 		}
@@ -2093,8 +2094,9 @@ static void rcu_prepare_for_idle(int cpu)
 	if (rcu_cpu_has_callbacks(cpu)) {
 		trace_rcu_prep_idle("More callbacks");
 		invoke_rcu_core();
-	} else
+	} else {
 		trace_rcu_prep_idle("Callbacks drained");
+	}
 }
 
 /*
-- 
cgit v1.2.3


From 5cf05ad758c30d17ff23c2be346b5de982bc2121 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 17 May 2012 15:12:45 -0700
Subject: rcu: Fix broken strings in RCU's source code.

Although the C language allows you to break strings across lines, doing
this makes it hard for people to find the Linux kernel code corresponding
to a given console message.  This commit therefore fixes broken strings
throughout RCU's source code.

Suggested-by: Josh Triplett <josh@joshtriplett.org>
Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  3 +--
 kernel/rcutorture.c      | 33 ++++++++++++++-------------------
 kernel/rcutree_trace.c   | 25 ++++++++++++-------------
 lib/list_debug.c         |  6 ++----
 4 files changed, 29 insertions(+), 38 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index c2c0d86dd3ac..115ead2b5155 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -427,8 +427,7 @@ extern int rcu_my_thread_group_empty(void);
 static inline void rcu_preempt_sleep_check(void)
 {
 	rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
-			   "Illegal context switch in RCU read-side "
-			   "critical section");
+			   "Illegal context switch in RCU read-side critical section");
 }
 #else /* #ifdef CONFIG_PROVE_RCU */
 static inline void rcu_preempt_sleep_check(void)
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 155fb129b641..25b15033c61f 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -49,8 +49,7 @@
 #include <asm/byteorder.h>
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
-	      "Josh Triplett <josh@freedesktop.org>");
+MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>");
 
 static int nreaders = -1;	/* # reader threads, defaults to 2*ncpus */
 static int nfakewriters = 4;	/* # fake writer threads */
@@ -1200,27 +1199,27 @@ rcu_torture_printk(char *page)
 	}
 	cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
 	cnt += sprintf(&page[cnt],
-		       "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d "
-		       "rtmbe: %d rtbke: %ld rtbre: %ld "
-		       "rtbf: %ld rtb: %ld nt: %ld "
-		       "onoff: %ld/%ld:%ld/%ld "
-		       "barrier: %ld/%ld:%ld",
+		       "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
 		       rcu_torture_current,
 		       rcu_torture_current_version,
 		       list_empty(&rcu_torture_freelist),
 		       atomic_read(&n_rcu_torture_alloc),
 		       atomic_read(&n_rcu_torture_alloc_fail),
-		       atomic_read(&n_rcu_torture_free),
+		       atomic_read(&n_rcu_torture_free));
+	cnt += sprintf(&page[cnt], "rtmbe: %d rtbke: %ld rtbre: %ld ",
 		       atomic_read(&n_rcu_torture_mberror),
 		       n_rcu_torture_boost_ktrerror,
-		       n_rcu_torture_boost_rterror,
+		       n_rcu_torture_boost_rterror);
+	cnt += sprintf(&page[cnt], "rtbf: %ld rtb: %ld nt: %ld ",
 		       n_rcu_torture_boost_failure,
 		       n_rcu_torture_boosts,
-		       n_rcu_torture_timers,
+		       n_rcu_torture_timers);
+	cnt += sprintf(&page[cnt], "onoff: %ld/%ld:%ld/%ld ",
 		       n_online_successes,
 		       n_online_attempts,
 		       n_offline_successes,
-		       n_offline_attempts,
+		       n_offline_attempts);
+	cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld",
 		       n_barrier_successes,
 		       n_barrier_attempts,
 		       n_rcu_torture_barrier_error);
@@ -1462,8 +1461,7 @@ rcu_torture_shutdown(void *arg)
 		delta = shutdown_time - jiffies_snap;
 		if (verbose)
 			printk(KERN_ALERT "%s" TORTURE_FLAG
-			       "rcu_torture_shutdown task: %lu "
-			       "jiffies remaining\n",
+			       "rcu_torture_shutdown task: %lu jiffies remaining\n",
 			       torture_type, delta);
 		schedule_timeout_interruptible(delta);
 		jiffies_snap = ACCESS_ONCE(jiffies);
@@ -1515,8 +1513,7 @@ rcu_torture_onoff(void *arg)
 			if (cpu_down(cpu) == 0) {
 				if (verbose)
 					printk(KERN_ALERT "%s" TORTURE_FLAG
-					       "rcu_torture_onoff task: "
-					       "offlined %d\n",
+					       "rcu_torture_onoff task: offlined %d\n",
 					       torture_type, cpu);
 				n_offline_successes++;
 			}
@@ -1529,8 +1526,7 @@ rcu_torture_onoff(void *arg)
 			if (cpu_up(cpu) == 0) {
 				if (verbose)
 					printk(KERN_ALERT "%s" TORTURE_FLAG
-					       "rcu_torture_onoff task: "
-					       "onlined %d\n",
+					       "rcu_torture_onoff task: onlined %d\n",
 					       torture_type, cpu);
 				n_online_successes++;
 			}
@@ -1952,8 +1948,7 @@ rcu_torture_init(void)
 		return -EINVAL;
 	}
 	if (cur_ops->fqs == NULL && fqs_duration != 0) {
-		printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero "
-				  "fqs_duration, fqs disabled.\n");
+		printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n");
 		fqs_duration = 0;
 	}
 	if (cur_ops->init)
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index a16ddbd6fdc4..abffb486e94e 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -218,8 +218,7 @@ static const struct file_operations rcudata_csv_fops = {
 
 static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)
 {
-	seq_printf(m,  "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu "
-		   "j=%04x bt=%04x\n",
+	seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu ",
 		   rnp->grplo, rnp->grphi,
 		   "T."[list_empty(&rnp->blkd_tasks)],
 		   "N."[!rnp->gp_tasks],
@@ -227,11 +226,11 @@ static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)
 		   "B."[!rnp->boost_tasks],
 		   convert_kthread_status(rnp->boost_kthread_status),
 		   rnp->n_tasks_boosted, rnp->n_exp_boosts,
-		   rnp->n_normal_boosts,
+		   rnp->n_normal_boosts);
+	seq_printf(m, "j=%04x bt=%04x\n",
 		   (int)(jiffies & 0xffff),
 		   (int)(rnp->boost_time & 0xffff));
-	seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n",
-		   "     balk",
+	seq_printf(m, "    balk: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n",
 		   rnp->n_balk_blkd_tasks,
 		   rnp->n_balk_exp_gp_tasks,
 		   rnp->n_balk_boost_tasks,
@@ -287,11 +286,11 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 	struct rcu_node *rnp;
 
 	gpnum = rsp->gpnum;
-	seq_printf(m, "%s: c=%lu g=%lu s=%d jfq=%ld j=%x "
-		      "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
+	seq_printf(m, "%s: c=%lu g=%lu s=%d jfq=%ld j=%x ",
 		   rsp->name, rsp->completed, gpnum, rsp->fqs_state,
 		   (long)(rsp->jiffies_force_qs - jiffies),
-		   (int)(jiffies & 0xffff),
+		   (int)(jiffies & 0xffff));
+	seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
 		   rsp->n_force_qs, rsp->n_force_qs_ngp,
 		   rsp->n_force_qs - rsp->n_force_qs_ngp,
 		   rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen);
@@ -378,16 +377,16 @@ static const struct file_operations rcugp_fops = {
 
 static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
 {
-	seq_printf(m, "%3d%cnp=%ld "
-		   "qsp=%ld rpq=%ld cbr=%ld cng=%ld "
-		   "gpc=%ld gps=%ld nf=%ld nn=%ld\n",
+	seq_printf(m, "%3d%cnp=%ld ",
 		   rdp->cpu,
 		   cpu_is_offline(rdp->cpu) ? '!' : ' ',
-		   rdp->n_rcu_pending,
+		   rdp->n_rcu_pending);
+	seq_printf(m, "qsp=%ld rpq=%ld cbr=%ld cng=%ld ",
 		   rdp->n_rp_qs_pending,
 		   rdp->n_rp_report_qs,
 		   rdp->n_rp_cb_ready,
-		   rdp->n_rp_cpu_needs_gp,
+		   rdp->n_rp_cpu_needs_gp);
+	seq_printf(m, "gpc=%ld gps=%ld nf=%ld nn=%ld\n",
 		   rdp->n_rp_gp_completed,
 		   rdp->n_rp_gp_started,
 		   rdp->n_rp_need_fqs,
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 23a5e031cd8b..c24c2f7e296f 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -87,12 +87,10 @@ void __list_add_rcu(struct list_head *new,
 		    struct list_head *prev, struct list_head *next)
 {
 	WARN(next->prev != prev,
-		"list_add_rcu corruption. next->prev should be "
-		"prev (%p), but was %p. (next=%p).\n",
+		"list_add_rcu corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
 		prev, next->prev, next);
 	WARN(prev->next != next,
-		"list_add_rcu corruption. prev->next should be "
-		"next (%p), but was %p. (prev=%p).\n",
+		"list_add_rcu corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
 		next, prev->next, prev);
 	new->next = next;
 	new->prev = prev;
-- 
cgit v1.2.3


From c6a4ebb9ae30ead7684bce623955f74b17df496d Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 6 Jul 2012 23:56:00 +0200
Subject: MIPS: Provide a symbol for the legacy performance counter interrupt.

Based on https://patchwork.linux-mips.org/patch/3576 - but this really
deserves its own patchset and the symbol should also be used :)

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/irq.h | 1 +
 arch/mips/kernel/traps.c    | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index fb698dc09bc9..78dbb8a86da2 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -136,6 +136,7 @@ extern void free_irqno(unsigned int irq);
  * IE7.  Since R2 their number has to be read from the c0_intctl register.
  */
 #define CP0_LEGACY_COMPARE_IRQ 7
+#define CP0_LEGACY_PERFCNT_IRQ 7
 
 extern int cp0_compare_irq;
 extern int cp0_compare_irq_shift;
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 2d0c2a277f52..f985b7292cd9 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1597,7 +1597,7 @@ void __cpuinit per_cpu_trap_init(bool is_boot_cpu)
 			cp0_perfcount_irq = -1;
 	} else {
 		cp0_compare_irq = CP0_LEGACY_COMPARE_IRQ;
-		cp0_compare_irq_shift = cp0_compare_irq;
+		cp0_compare_irq_shift = CP0_LEGACY_PERFCNT_IRQ;
 		cp0_perfcount_irq = -1;
 	}
 
-- 
cgit v1.2.3


From f0b77f2c0eed1e37c96b9a995d5a45e9eb4aaca8 Mon Sep 17 00:00:00 2001
From: "Steven J. Hill" <sjhill@mips.com>
Date: Fri, 6 Jul 2012 23:56:00 +0200
Subject: MIPS: Clean-up GIC and vectored interrupts.

This change adds macros for routing of GIC interrupts for EIC and
non-EIC hardware modes. Also added Malta GIC macros having to do
with performance and timer interrupts.

Signed-off-by: Steven J. Hill <sjhill@mips.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/3576/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/gic.h                  | 15 ++++++++++++++-
 arch/mips/include/asm/mips-boards/maltaint.h | 10 ++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/gic.h b/arch/mips/include/asm/gic.h
index 86548da650e7..991b659e2548 100644
--- a/arch/mips/include/asm/gic.h
+++ b/arch/mips/include/asm/gic.h
@@ -206,7 +206,7 @@
 
 #define GIC_VPE_EIC_SHADOW_SET_BASE	0x0100
 #define GIC_VPE_EIC_SS(intr) \
-	(GIC_EIC_SHADOW_SET_BASE + (4 * intr))
+	(GIC_VPE_EIC_SHADOW_SET_BASE + (4 * intr))
 
 #define GIC_VPE_EIC_VEC_BASE		0x0800
 #define GIC_VPE_EIC_VEC(intr) \
@@ -330,6 +330,17 @@ struct gic_intr_map {
 #define GIC_FLAG_TRANSPARENT   0x02
 };
 
+/*
+ * This is only used in EIC mode. This helps to figure out which
+ * shared interrupts we need to process when we get a vector interrupt.
+ */
+#define GIC_MAX_SHARED_INTR  0x5
+struct gic_shared_intr_map {
+	unsigned int num_shared_intr;
+	unsigned int intr_list[GIC_MAX_SHARED_INTR];
+	unsigned int local_intr_mask;
+};
+
 extern void gic_init(unsigned long gic_base_addr,
 	unsigned long gic_addrspace_size, struct gic_intr_map *intrmap,
 	unsigned int intrmap_size, unsigned int irqbase);
@@ -338,5 +349,7 @@ extern unsigned int gic_get_int(void);
 extern void gic_send_ipi(unsigned int intr);
 extern unsigned int plat_ipi_call_int_xlate(unsigned int);
 extern unsigned int plat_ipi_resched_int_xlate(unsigned int);
+extern void gic_bind_eic_interrupt(int irq, int set);
+extern unsigned int gic_get_timer_pending(void);
 
 #endif /* _ASM_GICREGS_H */
diff --git a/arch/mips/include/asm/mips-boards/maltaint.h b/arch/mips/include/asm/mips-boards/maltaint.h
index d11aa02a956a..5447d9fc4219 100644
--- a/arch/mips/include/asm/mips-boards/maltaint.h
+++ b/arch/mips/include/asm/mips-boards/maltaint.h
@@ -86,6 +86,16 @@
 #define GIC_CPU_INT4		4 /* .			*/
 #define GIC_CPU_INT5		5 /* Core Interrupt 5   */
 
+/* MALTA GIC local interrupts */
+#define GIC_INT_TMR             (GIC_CPU_INT5)
+#define GIC_INT_PERFCTR         (GIC_CPU_INT5)
+
+/* GIC constants */
+/* Add 2 to convert non-eic hw int # to eic vector # */
+#define GIC_CPU_TO_VEC_OFFSET   (2)
+/* If we map an intr to pin X, GIC will actually generate vector X+1 */
+#define GIC_PIN_TO_VEC_OFFSET   (1)
+
 #define GIC_EXT_INTR(x)		x
 
 /* External Interrupts used for IPI */
-- 
cgit v1.2.3


From 839efb4ffbfba74857e3411413f7ca53c8ff1925 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 6 Jul 2012 23:56:00 +0200
Subject: MIPS: MT: Fix indentation damage.

Split off from https://patchwork.linux-mips.org/patch/3603/.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/mipsmtregs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/mipsmtregs.h b/arch/mips/include/asm/mipsmtregs.h
index c9420aa97e32..e71ff4c317f2 100644
--- a/arch/mips/include/asm/mipsmtregs.h
+++ b/arch/mips/include/asm/mipsmtregs.h
@@ -48,7 +48,7 @@
 #define CP0_VPECONF0		$1, 2
 #define CP0_VPECONF1		$1, 3
 #define CP0_YQMASK		$1, 4
-#define CP0_VPESCHEDULE	$1, 5
+#define CP0_VPESCHEDULE		$1, 5
 #define CP0_VPESCHEFBK		$1, 6
 #define CP0_TCSTATUS		$2, 1
 #define CP0_TCBIND		$2, 2
-- 
cgit v1.2.3


From 113c62d9844d9037508fa156e47db1b5407a27c3 Mon Sep 17 00:00:00 2001
From: "Steven J. Hill" <sjhill@mips.com>
Date: Fri, 6 Jul 2012 23:56:00 +0200
Subject: MIPS: Add support for the M14Kc core.

[ralf@linux-mips.org: Fixed whitespace damage.]

Signed-off-by: Steven J. Hill <sjhill@mips.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/3773/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cpu.h          | 5 +++--
 arch/mips/kernel/cpu-probe.c         | 7 ++++++-
 arch/mips/mm/c-r4k.c                 | 1 +
 arch/mips/mm/tlbex.c                 | 2 ++
 arch/mips/oprofile/common.c          | 1 +
 arch/mips/oprofile/op_model_mipsxx.c | 4 ++++
 6 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index f9fa2a479dd0..c64910586b74 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -94,6 +94,7 @@
 #define PRID_IMP_24KE		0x9600
 #define PRID_IMP_74K		0x9700
 #define PRID_IMP_1004K		0x9900
+#define PRID_IMP_M14KC		0x9c00
 
 /*
  * These are the PRID's for when 23:16 == PRID_COMP_SIBYTE
@@ -260,7 +261,7 @@ enum cpu_type_enum {
 	 */
 	CPU_4KC, CPU_4KEC, CPU_4KSC, CPU_24K, CPU_34K, CPU_1004K, CPU_74K,
 	CPU_ALCHEMY, CPU_PR4450, CPU_BMIPS32, CPU_BMIPS3300, CPU_BMIPS4350,
-	CPU_BMIPS4380, CPU_BMIPS5000, CPU_JZRISC,
+	CPU_BMIPS4380, CPU_BMIPS5000, CPU_JZRISC, CPU_M14KC,
 
 	/*
 	 * MIPS64 class processors
@@ -288,7 +289,7 @@ enum cpu_type_enum {
 #define MIPS_CPU_ISA_M64R2	0x00000100
 
 #define MIPS_CPU_ISA_32BIT (MIPS_CPU_ISA_I | MIPS_CPU_ISA_II | \
-	MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 )
+	MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2)
 #define MIPS_CPU_ISA_64BIT (MIPS_CPU_ISA_III | MIPS_CPU_ISA_IV | \
 	MIPS_CPU_ISA_V | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2)
 
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 6ae7ce4ac63e..aaf39f3eaa51 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -4,7 +4,7 @@
  * Copyright (C) xxxx  the Anonymous
  * Copyright (C) 1994 - 2006 Ralf Baechle
  * Copyright (C) 2003, 2004  Maciej W. Rozycki
- * Copyright (C) 2001, 2004  MIPS Inc.
+ * Copyright (C) 2001, 2004, 2011, 2012  MIPS Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -199,6 +199,7 @@ void __init check_wait(void)
 		cpu_wait = rm7k_wait_irqoff;
 		break;
 
+	case CPU_M14KC:
 	case CPU_24K:
 	case CPU_34K:
 	case CPU_1004K:
@@ -831,6 +832,10 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_74K;
 		__cpu_name[cpu] = "MIPS 74Kc";
 		break;
+	case PRID_IMP_M14KC:
+		c->cputype = CPU_M14KC;
+		__cpu_name[cpu] = "MIPS M14Kc";
+		break;
 	case PRID_IMP_1004K:
 		c->cputype = CPU_1004K;
 		__cpu_name[cpu] = "MIPS 1004Kc";
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 5109be96d98d..e56efd059189 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1051,6 +1051,7 @@ static void __cpuinit probe_pcache(void)
 	case CPU_R14000:
 		break;
 
+	case CPU_M14KC:
 	case CPU_24K:
 	case CPU_34K:
 	case CPU_74K:
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 0bc485b3cd60..03eb0ef91580 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -9,6 +9,7 @@
  * Copyright (C) 2005, 2007, 2008, 2009  Maciej W. Rozycki
  * Copyright (C) 2006  Ralf Baechle (ralf@linux-mips.org)
  * Copyright (C) 2008, 2009 Cavium Networks, Inc.
+ * Copyright (C) 2011  MIPS Technologies, Inc.
  *
  * ... and the days got worse and worse and now you see
  * I've gone completly out of my mind.
@@ -494,6 +495,7 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,
 	case CPU_R14000:
 	case CPU_4KC:
 	case CPU_4KEC:
+	case CPU_M14KC:
 	case CPU_SB1:
 	case CPU_SB1A:
 	case CPU_4KSC:
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index d1f2d4c52d42..b6e378211a2c 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -78,6 +78,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 
 	switch (current_cpu_type()) {
 	case CPU_5KC:
+	case CPU_M14KC:
 	case CPU_20KC:
 	case CPU_24K:
 	case CPU_25KF:
diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c
index baba3bcaa3c2..4d80a856048d 100644
--- a/arch/mips/oprofile/op_model_mipsxx.c
+++ b/arch/mips/oprofile/op_model_mipsxx.c
@@ -322,6 +322,10 @@ static int __init mipsxx_init(void)
 
 	op_model_mipsxx_ops.num_counters = counters;
 	switch (current_cpu_type()) {
+	case CPU_M14KC:
+		op_model_mipsxx_ops.cpu_type = "mips/M14Kc";
+		break;
+
 	case CPU_20KC:
 		op_model_mipsxx_ops.cpu_type = "mips/20K";
 		break;
-- 
cgit v1.2.3


From 3da947b2693993d5f6138f005d2625e9387c9618 Mon Sep 17 00:00:00 2001
From: Sasha Levin <levinsasha928@gmail.com>
Date: Mon, 2 Jul 2012 01:29:55 +0000
Subject: ieee802154: verify packet size before trying to allocate it

Currently when sending data over datagram, the send function will attempt to
allocate any size passed on from the userspace.

We should make sure that this size is checked and limited. We'll limit it
to the MTU of the device, which is checked later anyway.

Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee802154/dgram.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index 6fbb2ad7bb6d..16705611589a 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -230,6 +230,12 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
 	mtu = dev->mtu;
 	pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
 
+	if (size > mtu) {
+		pr_debug("size = %Zu, mtu = %u\n", size, mtu);
+		err = -EINVAL;
+		goto out_dev;
+	}
+
 	hlen = LL_RESERVED_SPACE(dev);
 	tlen = dev->needed_tailroom;
 	skb = sock_alloc_send_skb(sk, hlen + tlen + size,
@@ -258,12 +264,6 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
 	if (err < 0)
 		goto out_skb;
 
-	if (size > mtu) {
-		pr_debug("size = %Zu, mtu = %u\n", size, mtu);
-		err = -EINVAL;
-		goto out_skb;
-	}
-
 	skb->dev = dev;
 	skb->sk  = sk;
 	skb->protocol = htons(ETH_P_IEEE802154);
-- 
cgit v1.2.3


From acfa9e94e2f06f8911a1d2f257880af4ad945eef Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 2 Jul 2012 08:36:12 +0000
Subject: net: dont use __netdev_alloc_skb for bounce buffer

commit a1c7fff7e1 (net: netdev_alloc_skb() use build_skb()) broke b44 on
some 64bit machines.

It appears b44 and b43 use __netdev_alloc_skb() instead of alloc_skb()
for their bounce buffers.

There is no need to add an extra NET_SKB_PAD reservation for bounce
buffers :

- In TX path, NET_SKB_PAD is useless

- In RX path in b44, we force a copy of incoming frames if
  GFP_DMA allocations were needed.

Reported-and-bisected-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/b44.c  | 4 ++--
 drivers/net/wireless/b43legacy/dma.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 46b8b7d81633..d09c6b583d17 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -656,7 +656,7 @@ static int b44_alloc_rx_skb(struct b44 *bp, int src_idx, u32 dest_idx_unmasked)
 			dma_unmap_single(bp->sdev->dma_dev, mapping,
 					     RX_PKT_BUF_SZ, DMA_FROM_DEVICE);
 		dev_kfree_skb_any(skb);
-		skb = __netdev_alloc_skb(bp->dev, RX_PKT_BUF_SZ, GFP_ATOMIC|GFP_DMA);
+		skb = alloc_skb(RX_PKT_BUF_SZ, GFP_ATOMIC | GFP_DMA);
 		if (skb == NULL)
 			return -ENOMEM;
 		mapping = dma_map_single(bp->sdev->dma_dev, skb->data,
@@ -967,7 +967,7 @@ static netdev_tx_t b44_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			dma_unmap_single(bp->sdev->dma_dev, mapping, len,
 					     DMA_TO_DEVICE);
 
-		bounce_skb = __netdev_alloc_skb(dev, len, GFP_ATOMIC | GFP_DMA);
+		bounce_skb = alloc_skb(len, GFP_ATOMIC | GFP_DMA);
 		if (!bounce_skb)
 			goto err_out;
 
diff --git a/drivers/net/wireless/b43legacy/dma.c b/drivers/net/wireless/b43legacy/dma.c
index f1f8bd09bd87..c8baf020c20f 100644
--- a/drivers/net/wireless/b43legacy/dma.c
+++ b/drivers/net/wireless/b43legacy/dma.c
@@ -1072,7 +1072,7 @@ static int dma_tx_fragment(struct b43legacy_dmaring *ring,
 	meta->dmaaddr = map_descbuffer(ring, skb->data, skb->len, 1);
 	/* create a bounce buffer in zone_dma on mapping failure. */
 	if (b43legacy_dma_mapping_error(ring, meta->dmaaddr, skb->len, 1)) {
-		bounce_skb = __dev_alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA);
+		bounce_skb = alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA);
 		if (!bounce_skb) {
 			ring->current_slot = old_top_slot;
 			ring->used_slots = old_used_slots;
-- 
cgit v1.2.3


From b94e52f62683dc0b00c6d1b58b80929a078c0fd5 Mon Sep 17 00:00:00 2001
From: Cloud Ren <cjren@qca.qualcomm.com>
Date: Tue, 3 Jul 2012 16:51:48 +0000
Subject: atl1c: fix issue of transmit queue 0 timed out

some people report atl1c could cause system hang with following
kernel trace info:
---------------------------------------
WARNING: at.../net/sched/sch_generic.c:258 dev_watchdog+0x1db/0x1d0()
...
NETDEV WATCHDOG: eth0 (atl1c): transmit queue 0 timed out
...
---------------------------------------
This is caused by netif_stop_queue calling when cable Link is down.
So remove netif_stop_queue, because link_watch will take it over.

Signed-off-by: xiong <xiong@qca.qualcomm.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Cloud Ren <cjren@qca.qualcomm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 9cc15701101b..1f78b63d5efe 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -261,7 +261,6 @@ static void atl1c_check_link_status(struct atl1c_adapter *adapter)
 	if ((phy_data & BMSR_LSTATUS) == 0) {
 		/* link down */
 		netif_carrier_off(netdev);
-		netif_stop_queue(netdev);
 		hw->hibernate = true;
 		if (atl1c_reset_mac(hw) != 0)
 			if (netif_msg_hw(adapter))
-- 
cgit v1.2.3


From 960fb66e520a405dde39ff883f17ff2669c13d85 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 3 Jul 2012 20:55:21 +0000
Subject: netem: add limitation to reordered packets

Fix two netem bugs :

1) When a frame was dropped by tfifo_enqueue(), drop counter
   was incremented twice.

2) When reordering is triggered, we enqueue a packet without
   checking queue limit. This can OOM pretty fast when this
   is repeated enough, since skbs are orphaned, no socket limit
   can help in this situation.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Mark Gordon <msg@google.com>
Cc: Andreas Terzis <aterzis@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 42 +++++++++++++++---------------------------
 1 file changed, 15 insertions(+), 27 deletions(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a2a95aabf9c2..c412ad0d0308 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -331,29 +331,22 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
 	return PSCHED_NS2TICKS(ticks);
 }
 
-static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
+static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 {
 	struct sk_buff_head *list = &sch->q;
 	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
-	struct sk_buff *skb;
-
-	if (likely(skb_queue_len(list) < sch->limit)) {
-		skb = skb_peek_tail(list);
-		/* Optimize for add at tail */
-		if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
-			return qdisc_enqueue_tail(nskb, sch);
+	struct sk_buff *skb = skb_peek_tail(list);
 
-		skb_queue_reverse_walk(list, skb) {
-			if (tnext >= netem_skb_cb(skb)->time_to_send)
-				break;
-		}
+	/* Optimize for add at tail */
+	if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
+		return __skb_queue_tail(list, nskb);
 
-		__skb_queue_after(list, skb, nskb);
-		sch->qstats.backlog += qdisc_pkt_len(nskb);
-		return NET_XMIT_SUCCESS;
+	skb_queue_reverse_walk(list, skb) {
+		if (tnext >= netem_skb_cb(skb)->time_to_send)
+			break;
 	}
 
-	return qdisc_reshape_fail(nskb, sch);
+	__skb_queue_after(list, skb, nskb);
 }
 
 /*
@@ -368,7 +361,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	/* We don't fill cb now as skb_unshare() may invalidate it */
 	struct netem_skb_cb *cb;
 	struct sk_buff *skb2;
-	int ret;
 	int count = 1;
 
 	/* Random duplication */
@@ -419,6 +411,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
 	}
 
+	if (unlikely(skb_queue_len(&sch->q) >= sch->limit))
+		return qdisc_reshape_fail(skb, sch);
+
+	sch->qstats.backlog += qdisc_pkt_len(skb);
+
 	cb = netem_skb_cb(skb);
 	if (q->gap == 0 ||		/* not doing reordering */
 	    q->counter < q->gap - 1 ||	/* inside last reordering gap */
@@ -450,7 +447,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 		cb->time_to_send = now + delay;
 		++q->counter;
-		ret = tfifo_enqueue(skb, sch);
+		tfifo_enqueue(skb, sch);
 	} else {
 		/*
 		 * Do re-ordering by putting one out of N packets at the front
@@ -460,16 +457,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		q->counter = 0;
 
 		__skb_queue_head(&sch->q, skb);
-		sch->qstats.backlog += qdisc_pkt_len(skb);
 		sch->qstats.requeues++;
-		ret = NET_XMIT_SUCCESS;
-	}
-
-	if (ret != NET_XMIT_SUCCESS) {
-		if (net_xmit_drop_count(ret)) {
-			sch->qstats.drops++;
-			return ret;
-		}
 	}
 
 	return NET_XMIT_SUCCESS;
-- 
cgit v1.2.3


From d4e41649434cd6db2e69783130cba81886dac97f Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Wed, 4 Jul 2012 02:00:25 +0000
Subject: ixgbe: DCB and SR-IOV can not co-exist and will cause hangs

DCB and SR-IOV cannot currently be enabled at the same time as the queueing
schemes are incompatible.  If they are both enabled it will result in Tx
hangs since only the first Tx queue will be able to transmit any traffic.

This simple fix for this is to block us from enabling TCs in ixgbe_setup_tc
if SR-IOV is enabled.  This change will be reverted once we can support
SR-IOV and DCB coexistence.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 18ca3bcadf0c..e242104ab471 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6647,6 +6647,11 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc)
 		return -EINVAL;
 	}
 
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+		e_err(drv, "Enable failed, SR-IOV enabled\n");
+		return -EINVAL;
+	}
+
 	/* Hardware supports up to 8 traffic classes */
 	if (tc > adapter->dcb_cfg.num_tcs.pg_tcs ||
 	    (hw->mac.type == ixgbe_mac_82598EB &&
-- 
cgit v1.2.3


From b93984c9afacd4fe32b785d52a93660d91202b10 Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Wed, 4 Jul 2012 12:06:16 +0000
Subject: netdev/phy: Fixup lockdep warnings in mdio-mux.c

With lockdep enabled we get:

=============================================
[ INFO: possible recursive locking detected ]
3.4.4-Cavium-Octeon+ #313 Not tainted
---------------------------------------------
kworker/u:1/36 is trying to acquire lock:
(&bus->mdio_lock){+.+...}, at: [<ffffffff813da7e8>] mdio_mux_read+0x38/0xa0

but task is already holding lock:
 (&bus->mdio_lock){+.+...}, at: [<ffffffff813d79e4>] mdiobus_read+0x44/0x88

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&bus->mdio_lock);
  lock(&bus->mdio_lock);

 *** DEADLOCK ***

 May be due to missing lock nesting notation
.
.
.

This is a false positive, since we are indeed using 'nested' locking,
we need to use mutex_lock_nested().

Now in theory we can stack multiple MDIO multiplexers, but that would
require passing the nesting level (which is difficult to know) to
mutex_lock_nested().  Instead we assume the simple case of a single
level of nesting.  Since these are only warning messages, it isn't so
important to solve the general case.

Signed-off-by: David Daney <david.daney@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-mux.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
index 39ea0674dcde..5c120189ec86 100644
--- a/drivers/net/phy/mdio-mux.c
+++ b/drivers/net/phy/mdio-mux.c
@@ -46,7 +46,13 @@ static int mdio_mux_read(struct mii_bus *bus, int phy_id, int regnum)
 	struct mdio_mux_parent_bus *pb = cb->parent;
 	int r;
 
-	mutex_lock(&pb->mii_bus->mdio_lock);
+	/* In theory multiple mdio_mux could be stacked, thus creating
+	 * more than a single level of nesting.  But in practice,
+	 * SINGLE_DEPTH_NESTING will cover the vast majority of use
+	 * cases.  We use it, instead of trying to handle the general
+	 * case.
+	 */
+	mutex_lock_nested(&pb->mii_bus->mdio_lock, SINGLE_DEPTH_NESTING);
 	r = pb->switch_fn(pb->current_child, cb->bus_number, pb->switch_data);
 	if (r)
 		goto out;
@@ -71,7 +77,7 @@ static int mdio_mux_write(struct mii_bus *bus, int phy_id,
 
 	int r;
 
-	mutex_lock(&pb->mii_bus->mdio_lock);
+	mutex_lock_nested(&pb->mii_bus->mdio_lock, SINGLE_DEPTH_NESTING);
 	r = pb->switch_fn(pb->current_child, cb->bus_number, pb->switch_data);
 	if (r)
 		goto out;
-- 
cgit v1.2.3


From b761c9b1f4f69eb53fb6147547a1ab25237a93b3 Mon Sep 17 00:00:00 2001
From: Gao feng <gaofeng@cn.fujitsu.com>
Date: Wed, 4 Jul 2012 23:28:40 +0000
Subject: cgroup: fix panic in netprio_cgroup

we set max_prioidx to the first zero bit index of prioidx_map in
function get_prioidx.

So when we delete the low index netprio cgroup and adding a new
netprio cgroup again,the max_prioidx will be set to the low index.

when we set the high index cgroup's net_prio.ifpriomap,the function
write_priomap will call update_netdev_tables to alloc memory which
size is sizeof(struct netprio_map) + sizeof(u32) * (max_prioidx + 1),
so the size of array that map->priomap point to is max_prioidx +1,
which is low than what we actually need.

fix this by adding check in get_prioidx,only set max_prioidx when
max_prioidx low than the new prioidx.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netprio_cgroup.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 5b8aa2fae48b..aa907ed466ea 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -49,8 +49,9 @@ static int get_prioidx(u32 *prio)
 		return -ENOSPC;
 	}
 	set_bit(prioidx, prioidx_map);
+	if (atomic_read(&max_prioidx) < prioidx)
+		atomic_set(&max_prioidx, prioidx);
 	spin_unlock_irqrestore(&prioidx_map_lock, flags);
-	atomic_set(&max_prioidx, prioidx);
 	*prio = prioidx;
 	return 0;
 }
-- 
cgit v1.2.3


From 6fecd35d4cd79fc75e8290abb86734c18500d2a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Thu, 5 Jul 2012 01:13:33 +0000
Subject: net: qmi_wwan: add ZTE MF60
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adding a device with limited QMI support. It does not support
normal QMI_WDS commands for connection management. Instead,
sending a QMI_CTL SET_INSTANCE_ID command is required to
enable the network interface:

  01 0f 00 00 00 00 00 00  20 00 04 00 01 01 00 00

A number of QMI_DMS and QMI_NAS commands are also supported
for optional device management.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index b01960fcfbc9..a051cedd64bd 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -346,6 +346,15 @@ static const struct driver_info	qmi_wwan_force_int1 = {
 	.data		= BIT(1), /* interface whitelist bitmap */
 };
 
+static const struct driver_info qmi_wwan_force_int2 = {
+	.description	= "Qualcomm WWAN/QMI device",
+	.flags		= FLAG_WWAN,
+	.bind		= qmi_wwan_bind_shared,
+	.unbind		= qmi_wwan_unbind_shared,
+	.manage_power	= qmi_wwan_manage_power,
+	.data		= BIT(2), /* interface whitelist bitmap */
+};
+
 static const struct driver_info	qmi_wwan_force_int3 = {
 	.description	= "Qualcomm WWAN/QMI device",
 	.flags		= FLAG_WWAN,
@@ -498,6 +507,15 @@ static const struct usb_device_id products[] = {
 		.bInterfaceProtocol = 0xff,
 		.driver_info        = (unsigned long)&qmi_wwan_force_int4,
 	},
+	{	/* ZTE MF60 */
+		.match_flags	    = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO,
+		.idVendor           = 0x19d2,
+		.idProduct          = 0x1402,
+		.bInterfaceClass    = 0xff,
+		.bInterfaceSubClass = 0xff,
+		.bInterfaceProtocol = 0xff,
+		.driver_info        = (unsigned long)&qmi_wwan_force_int2,
+	},
 	{	/* Sierra Wireless MC77xx in QMI mode */
 		.match_flags	    = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO,
 		.idVendor           = 0x1199,
-- 
cgit v1.2.3


From 054581e6c1eb314d54d4747fba545e9802be29da Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Thu, 5 Jul 2012 14:21:55 +0000
Subject: cnic: Don't use netdev->base_addr

commit c0357e975afdbbedab5c662d19bef865f02adc17
    bnx2: stop using net_device.{base_addr, irq}.

removed netdev->base_addr so we need to update cnic to get the MMIO
base address from pci_resource_start().  Otherwise, mmap of the uio
device will fail.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/cnic.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index c95e7b5e2b85..3c95065e0def 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -1053,12 +1053,13 @@ static int cnic_init_uio(struct cnic_dev *dev)
 
 	uinfo = &udev->cnic_uinfo;
 
-	uinfo->mem[0].addr = dev->netdev->base_addr;
+	uinfo->mem[0].addr = pci_resource_start(dev->pcidev, 0);
 	uinfo->mem[0].internal_addr = dev->regview;
-	uinfo->mem[0].size = dev->netdev->mem_end - dev->netdev->mem_start;
 	uinfo->mem[0].memtype = UIO_MEM_PHYS;
 
 	if (test_bit(CNIC_F_BNX2_CLASS, &dev->flags)) {
+		uinfo->mem[0].size = MB_GET_CID_ADDR(TX_TSS_CID +
+						     TX_MAX_TSS_RINGS + 1);
 		uinfo->mem[1].addr = (unsigned long) cp->status_blk.gen &
 					PAGE_MASK;
 		if (cp->ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX)
@@ -1068,6 +1069,8 @@ static int cnic_init_uio(struct cnic_dev *dev)
 
 		uinfo->name = "bnx2_cnic";
 	} else if (test_bit(CNIC_F_BNX2X_CLASS, &dev->flags)) {
+		uinfo->mem[0].size = pci_resource_len(dev->pcidev, 0);
+
 		uinfo->mem[1].addr = (unsigned long) cp->bnx2x_def_status_blk &
 			PAGE_MASK;
 		uinfo->mem[1].size = sizeof(*cp->bnx2x_def_status_blk);
-- 
cgit v1.2.3


From a73f89a61f92b364f0b4a3be412b5b70553afc23 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 29 Jun 2012 09:42:28 +0000
Subject: netfilter: ipset: timeout fixing bug broke SET target special timeout
 value

The patch "127f559 netfilter: ipset: fix timeout value overflow bug"
broke the SET target when no timeout was specified.

Reported-by: Jean-Philippe Menil <jean-philippe.menil@univ-nantes.fr>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_set.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 035960ec5cb9..c6f7db720d84 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -16,6 +16,7 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_set.h>
+#include <linux/netfilter/ipset/ip_set_timeout.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -310,7 +311,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
 		info->del_set.flags, 0, UINT_MAX);
 
 	/* Normalize to fit into jiffies */
-	if (add_opt.timeout > UINT_MAX/MSEC_PER_SEC)
+	if (add_opt.timeout != IPSET_NO_TIMEOUT &&
+	    add_opt.timeout > UINT_MAX/MSEC_PER_SEC)
 		add_opt.timeout = UINT_MAX/MSEC_PER_SEC;
 	if (info->add_set.index != IPSET_INVALID_ID)
 		ip_set_add(info->add_set.index, skb, par, &add_opt);
-- 
cgit v1.2.3


From 6bd0405bb4196b44f1acb7a58f11382cdaf6f7f0 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 5 Jul 2012 15:42:10 +0200
Subject: netfilter: nf_ct_ecache: fix crash with multiple containers, one
 shutting down

Hans reports that he's still hitting:

BUG: unable to handle kernel NULL pointer dereference at 000000000000027c
IP: [<ffffffff813615db>] netlink_has_listeners+0xb/0x60
PGD 0
Oops: 0000 [#3] PREEMPT SMP
CPU 0

It happens when adding a number of containers with do:

nfct_query(h, NFCT_Q_CREATE, ct);

and most likely one namespace shuts down.

this problem was supposed to be fixed by:
70e9942 netfilter: nf_conntrack: make event callback registration per-netns

Still, it was missing one rcu_access_pointer to check if the callback
is set or not.

Reported-by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_ecache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index a88fb6939387..e1ce1048fe5f 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -78,7 +78,7 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
 	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_ecache *e;
 
-	if (net->ct.nf_conntrack_event_cb == NULL)
+	if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb))
 		return;
 
 	e = nf_ct_ecache_find(ct);
-- 
cgit v1.2.3


From 0e050923a797c1fc46ccc1e5182fd3090f33a75d Mon Sep 17 00:00:00 2001
From: Frank Kunz <xxxxxmichl@googlemail.com>
Date: Thu, 5 Jul 2012 22:32:49 +0200
Subject: HID: add Sennheiser BTD500USB device support

The Sennheiser BTD500USB composit device requires the
HID_QUIRK_NOGET flag to be set for working proper. Without the
flag the device crashes during hid intialization.

Signed-off-by: Frank Kunz <xxxxxmichl@googlemail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h           | 3 +++
 drivers/hid/usbhid/hid-quirks.c | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index d1cdd2d28409..fc0c68e4b9ed 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -653,6 +653,9 @@
 #define USB_DEVICE_ID_SAMSUNG_IR_REMOTE	0x0001
 #define USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE	0x0600
 
+#define USB_VENDOR_ID_SENNHEISER	0x1395
+#define USB_DEVICE_ID_SENNHEISER_BTD500USB	0x002c
+
 #define USB_VENDOR_ID_SIGMA_MICRO	0x1c4f
 #define USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD	0x0002
 
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 0597ee604f6e..903eef3d3e10 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -76,6 +76,7 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_PRODIGE, USB_DEVICE_ID_PRODIGE_CORDLESS, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_PIXART_IMAGING_INC_OPTICAL_TOUCH_SCREEN, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008, HID_QUIRK_NOGET },
+	{ USB_VENDOR_ID_SENNHEISER, USB_DEVICE_ID_SENNHEISER_BTD500USB, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_SUN, USB_DEVICE_ID_RARITAN_KVM_DONGLE, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_1, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_2, HID_QUIRK_NOGET },
-- 
cgit v1.2.3


From 472dd35ccbd999a6cb2994c318ca16edf65c4359 Mon Sep 17 00:00:00 2001
From: Thomas Huehn <thomas@net.t-labs.tu-berlin.de>
Date: Fri, 29 Jun 2012 06:26:27 -0700
Subject: mac80211: correct size the argument to kzalloc in minstrel_ht

msp has type struct minstrel_ht_sta_priv not struct minstrel_ht_sta.

(This incorporates the fixup originally posted as "mac80211: fix kzalloc
memory corruption introduced in minstrel_ht". -- JWL)

Reported-by: Fengguang Wu <wfg@linux.intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Thomas Huehn <thomas@net.t-labs.tu-berlin.de>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_minstrel_ht.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 2d1acc6c5445..f9e51ef8dfa2 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -809,7 +809,7 @@ minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
 			max_rates = sband->n_bitrates;
 	}
 
-	msp = kzalloc(sizeof(struct minstrel_ht_sta), gfp);
+	msp = kzalloc(sizeof(*msp), gfp);
 	if (!msp)
 		return NULL;
 
-- 
cgit v1.2.3


From 147f20e316f3949f3f5ffe6c8658e9fe1c6ceb23 Mon Sep 17 00:00:00 2001
From: Sasha Levin <levinsasha928@gmail.com>
Date: Sat, 30 Jun 2012 11:56:47 +0200
Subject: NFC: Prevent NULL deref when getting socket name

llcp_sock_getname can be called without a device attached to the nfc_llcp_sock.

This would lead to the following BUG:

[  362.341807] BUG: unable to handle kernel NULL pointer dereference at           (null)
[  362.341815] IP: [<ffffffff836258e5>] llcp_sock_getname+0x75/0xc0
[  362.341818] PGD 31b35067 PUD 30631067 PMD 0
[  362.341821] Oops: 0000 [#627] PREEMPT SMP DEBUG_PAGEALLOC
[  362.341826] CPU 3
[  362.341827] Pid: 7816, comm: trinity-child55 Tainted: G      D W    3.5.0-rc4-next-20120628-sasha-00005-g9f23eb7 #479
[  362.341831] RIP: 0010:[<ffffffff836258e5>]  [<ffffffff836258e5>] llcp_sock_getname+0x75/0xc0
[  362.341832] RSP: 0018:ffff8800304fde88  EFLAGS: 00010286
[  362.341834] RAX: 0000000000000000 RBX: ffff880033cb8000 RCX: 0000000000000001
[  362.341835] RDX: ffff8800304fdec4 RSI: ffff8800304fdec8 RDI: ffff8800304fdeda
[  362.341836] RBP: ffff8800304fdea8 R08: 7ebcebcb772b7ffb R09: 5fbfcb9c35bdfd53
[  362.341838] R10: 4220020c54326244 R11: 0000000000000246 R12: ffff8800304fdec8
[  362.341839] R13: ffff8800304fdec4 R14: ffff8800304fdec8 R15: 0000000000000044
[  362.341841] FS:  00007effa376e700(0000) GS:ffff880035a00000(0000) knlGS:0000000000000000
[  362.341843] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  362.341844] CR2: 0000000000000000 CR3: 0000000030438000 CR4: 00000000000406e0
[  362.341851] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  362.341856] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  362.341858] Process trinity-child55 (pid: 7816, threadinfo ffff8800304fc000, task ffff880031270000)
[  362.341858] Stack:
[  362.341862]  ffff8800304fdea8 ffff880035156780 0000000000000000 0000000000001000
[  362.341865]  ffff8800304fdf78 ffffffff83183b40 00000000304fdec8 0000006000000000
[  362.341868]  ffff8800304f0027 ffffffff83729649 ffff8800304fdee8 ffff8800304fdf48
[  362.341869] Call Trace:
[  362.341874]  [<ffffffff83183b40>] sys_getpeername+0xa0/0x110
[  362.341877]  [<ffffffff83729649>] ? _raw_spin_unlock_irq+0x59/0x80
[  362.341882]  [<ffffffff810f342b>] ? do_setitimer+0x23b/0x290
[  362.341886]  [<ffffffff81985ede>] ? trace_hardirqs_on_thunk+0x3a/0x3f
[  362.341889]  [<ffffffff8372a539>] system_call_fastpath+0x16/0x1b
[  362.341921] Code: 84 00 00 00 00 00 b8 b3 ff ff ff 48 85 db 74 54 66 41 c7 04 24 27 00 49 8d 7c 24 12 41 c7 45 00 60 00 00 00 48 8b 83 28 05 00 00 <8b> 00 41 89 44 24 04 0f b6 83 41 05 00 00 41 88 44 24 10 0f b6
[  362.341924] RIP  [<ffffffff836258e5>] llcp_sock_getname+0x75/0xc0
[  362.341925]  RSP <ffff8800304fde88>
[  362.341926] CR2: 0000000000000000
[  362.341928] ---[ end trace 6d450e935ee18bf3 ]---

Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/nfc/llcp/sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c
index 17a707db40eb..e06d458fc719 100644
--- a/net/nfc/llcp/sock.c
+++ b/net/nfc/llcp/sock.c
@@ -292,7 +292,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *addr,
 
 	pr_debug("%p\n", sk);
 
-	if (llcp_sock == NULL)
+	if (llcp_sock == NULL || llcp_sock->dev == NULL)
 		return -EBADFD;
 
 	addr->sa_family = AF_NFC;
-- 
cgit v1.2.3


From 10a9109f2705fdc3caa94d768b2559587a9a050c Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Mon, 2 Jul 2012 14:42:03 +0300
Subject: mac80211: destroy assoc_data correctly if assoc fails

If association failed due to internal error (e.g. no
supported rates IE), we call ieee80211_destroy_assoc_data()
with assoc=true, while we actually reject the association.

This results in the BSSID not being zeroed out.

After passing assoc=false, we no longer have to call
sta_info_destroy_addr() explicitly. While on it, move
the "associated" message after the assoc_success check.

Cc: stable@vger.kernel.org [3.4+]
Signed-off-by: Eliad Peller <eliad@wizery.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a4bb856de08f..0db5d34a06b6 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2174,15 +2174,13 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		       sdata->name, mgmt->sa, status_code);
 		ieee80211_destroy_assoc_data(sdata, false);
 	} else {
-		printk(KERN_DEBUG "%s: associated\n", sdata->name);
-
 		if (!ieee80211_assoc_success(sdata, *bss, mgmt, len)) {
 			/* oops -- internal error -- send timeout for now */
-			ieee80211_destroy_assoc_data(sdata, true);
-			sta_info_destroy_addr(sdata, mgmt->bssid);
+			ieee80211_destroy_assoc_data(sdata, false);
 			cfg80211_put_bss(*bss);
 			return RX_MGMT_CFG80211_ASSOC_TIMEOUT;
 		}
+		printk(KERN_DEBUG "%s: associated\n", sdata->name);
 
 		/*
 		 * destroy assoc_data afterwards, as otherwise an idle
-- 
cgit v1.2.3


From b3190466b0b6d336eddd10319c64a3ce3029b3ff Mon Sep 17 00:00:00 2001
From: Bing Zhao <bzhao@marvell.com>
Date: Tue, 3 Jul 2012 15:53:13 -0700
Subject: mwifiex: fix Coverity SCAN CID 709078: Resource leak (RESOURCE_LEAK)

> *. CID 709078: Resource leak (RESOURCE_LEAK)
> 	- drivers/net/wireless/mwifiex/cfg80211.c, line: 935
> Assigning: "bss_cfg" = storage returned from "kzalloc(132UL, 208U)"
> 	- but was not free
> drivers/net/wireless/mwifiex/cfg80211.c:935

Signed-off-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/mwifiex/cfg80211.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index ce61b6fae1c9..5c7fd185373c 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -958,6 +958,7 @@ static int mwifiex_cfg80211_start_ap(struct wiphy *wiphy,
 	case NL80211_HIDDEN_SSID_ZERO_CONTENTS:
 		/* firmware doesn't support this type of hidden SSID */
 	default:
+		kfree(bss_cfg);
 		return -EINVAL;
 	}
 
-- 
cgit v1.2.3


From efd821182cec8c92babef6e00a95066d3252fda4 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 4 Jul 2012 13:10:02 +0200
Subject: rt2x00usb: fix indexes ordering on RX queue kick

On rt2x00_dmastart() we increase index specified by Q_INDEX and on
rt2x00_dmadone() we increase index specified by Q_INDEX_DONE. So entries
between Q_INDEX_DONE and Q_INDEX are those we currently process in the
hardware. Entries between Q_INDEX and Q_INDEX_DONE are those we can
submit to the hardware.

According to that fix rt2x00usb_kick_queue(), as we need to submit RX
entries that are not processed by the hardware. It worked before only
for empty queue, otherwise was broken.

Note that for TX queues indexes ordering are ok. We need to kick entries
that have filled skb, but was not submitted to the hardware, i.e.
started from Q_INDEX_DONE and have ENTRY_DATA_PENDING bit set.

From practical standpoint this fixes RX queue stall, usually reproducible
in AP mode, like for example reported here:
https://bugzilla.redhat.com/show_bug.cgi?id=828824

Reported-and-tested-by: Franco Miceli <fmiceli@plan.ceibal.edu.uy>
Reported-and-tested-by: Tom Horsley <horsley1953@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/rt2x00/rt2x00usb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c
index d357d1ed92f6..74ecc33fdd90 100644
--- a/drivers/net/wireless/rt2x00/rt2x00usb.c
+++ b/drivers/net/wireless/rt2x00/rt2x00usb.c
@@ -436,8 +436,8 @@ void rt2x00usb_kick_queue(struct data_queue *queue)
 	case QID_RX:
 		if (!rt2x00queue_full(queue))
 			rt2x00queue_for_each_entry(queue,
-						   Q_INDEX_DONE,
 						   Q_INDEX,
+						   Q_INDEX_DONE,
 						   NULL,
 						   rt2x00usb_kick_rx_entry);
 		break;
-- 
cgit v1.2.3


From c2ca7d92ed4bbd779516beb6eb226e19f7f7ab0f Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 4 Jul 2012 13:20:20 +0200
Subject: iwlegacy: always monitor for stuck queues

This is iwlegacy version of:

commit 342bbf3fee2fa9a18147e74b2e3c4229a4564912
Author: Johannes Berg <johannes.berg@intel.com>
Date:   Sun Mar 4 08:50:46 2012 -0800

    iwlwifi: always monitor for stuck queues

    If we only monitor while associated, the following
    can happen:
     - we're associated, and the queue stuck check
       runs, setting the queue "touch" time to X
     - we disassociate, stopping the monitoring,
       which leaves the time set to X
     - almost 2s later, we associate, and enqueue
       a frame
     - before the frame is transmitted, we monitor
       for stuck queues, and find the time set to
       X, although it is now later than X + 2000ms,
       so we decide that the queue is stuck and
       erroneously restart the device

Cc: stable@vger.kernel.org
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlegacy/common.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c
index cbf2dc18341f..5d4807c2b56d 100644
--- a/drivers/net/wireless/iwlegacy/common.c
+++ b/drivers/net/wireless/iwlegacy/common.c
@@ -4767,14 +4767,12 @@ il_bg_watchdog(unsigned long data)
 		return;
 
 	/* monitor and check for other stuck queues */
-	if (il_is_any_associated(il)) {
-		for (cnt = 0; cnt < il->hw_params.max_txq_num; cnt++) {
-			/* skip as we already checked the command queue */
-			if (cnt == il->cmd_queue)
-				continue;
-			if (il_check_stuck_queue(il, cnt))
-				return;
-		}
+	for (cnt = 0; cnt < il->hw_params.max_txq_num; cnt++) {
+		/* skip as we already checked the command queue */
+		if (cnt == il->cmd_queue)
+			continue;
+		if (il_check_stuck_queue(il, cnt))
+			return;
 	}
 
 	mod_timer(&il->watchdog,
-- 
cgit v1.2.3


From b48d96652626b315229b1b82c6270eead6a77a6d Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Wed, 4 Jul 2012 13:59:08 +0200
Subject: iwlegacy: don't mess up the SCD when removing a key

When we remove a key, we put a key index which was supposed
to tell the fw that we are actually removing the key. But
instead the fw took that index as a valid index and messed
up the SRAM of the device.

This memory corruption on the device mangled the data of
the SCD. The impact on the user is that SCD queue 2 got
stuck after having removed keys.

Reported-by: Paul Bolle <pebolle@tiscali.nl>
Cc: stable@vger.kernel.org
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlegacy/4965-mac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c
index 509301a5e7e2..ff5d689e13f3 100644
--- a/drivers/net/wireless/iwlegacy/4965-mac.c
+++ b/drivers/net/wireless/iwlegacy/4965-mac.c
@@ -3405,7 +3405,7 @@ il4965_remove_dynamic_key(struct il_priv *il,
 		return 0;
 	}
 
-	if (il->stations[sta_id].sta.key.key_offset == WEP_INVALID_OFFSET) {
+	if (il->stations[sta_id].sta.key.key_flags & STA_KEY_FLG_INVALID) {
 		IL_WARN("Removing wrong key %d 0x%x\n", keyconf->keyidx,
 			key_flags);
 		spin_unlock_irqrestore(&il->sta_lock, flags);
@@ -3420,7 +3420,7 @@ il4965_remove_dynamic_key(struct il_priv *il,
 	memset(&il->stations[sta_id].sta.key, 0, sizeof(struct il4965_keyinfo));
 	il->stations[sta_id].sta.key.key_flags =
 	    STA_KEY_FLG_NO_ENC | STA_KEY_FLG_INVALID;
-	il->stations[sta_id].sta.key.key_offset = WEP_INVALID_OFFSET;
+	il->stations[sta_id].sta.key.key_offset = keyconf->hw_key_idx;
 	il->stations[sta_id].sta.sta.modify_mask = STA_MODIFY_KEY_MASK;
 	il->stations[sta_id].sta.mode = STA_CONTROL_MODIFY_MSK;
 
-- 
cgit v1.2.3


From 8e83989106562326bfd6aaf92174fe138efd026b Mon Sep 17 00:00:00 2001
From: Deepak Sikri <deepak.sikri@st.com>
Date: Sun, 8 Jul 2012 21:14:45 +0000
Subject: stmmac: Fix for nfs hang on multiple reboot

It was observed that during multiple reboots nfs hangs. The status of
receive descriptors shows that all the descriptors were in control of
CPU, and none were assigned to DMA.
Also the DMA status register confirmed that the Rx buffer is
unavailable.

This patch adds the fix for the same by adding the memory barriers to
ascertain that the all instructions before enabling the Rx or Tx DMA are
completed which involves the proper setting of the ownership bit in DMA
descriptors.

Signed-off-by: Deepak Sikri <deepak.sikri@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 51b3b68528ee..ea3003edde18 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1212,6 +1212,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion);
 		wmb();
 		priv->hw->desc->set_tx_owner(desc);
+		wmb();
 	}
 
 	/* Interrupt on completition only for the latest segment */
@@ -1227,6 +1228,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* To avoid raise condition */
 	priv->hw->desc->set_tx_owner(first);
+	wmb();
 
 	priv->cur_tx++;
 
@@ -1290,6 +1292,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv)
 		}
 		wmb();
 		priv->hw->desc->set_rx_owner(p + entry);
+		wmb();
 	}
 }
 
-- 
cgit v1.2.3


From 684901a6df1fb91fc9a2bdb89ffbebb241428d78 Mon Sep 17 00:00:00 2001
From: Deepak Sikri <deepak.sikri@st.com>
Date: Sun, 8 Jul 2012 21:14:46 +0000
Subject: stmmac: Fix for higher mtu size handling

For the higher mtu sizes requiring the buffer size greater than 8192,
the buffers are sent or received using multiple dma descriptors/ same
descriptor with option of multi buffer handling.
It was observed during tests that the driver was missing on data
packets during the normal ping operations if the data buffers being used
catered to jumbo frame handling.

The memory barrriers are added in between preparation of dma descriptors
in the jumbo frame handling path to ensure all instructions before
enabling the dma are complete.

Signed-off-by: Deepak Sikri <deepak.sikri@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/ring_mode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
index fb8377da1687..4b785e10f2ed 100644
--- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
@@ -51,7 +51,7 @@ static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 		desc->des3 = desc->des2 + BUF_SIZE_4KiB;
 		priv->hw->desc->prepare_tx_desc(desc, 1, bmax,
 						csum);
-
+		wmb();
 		entry = (++priv->cur_tx) % txsize;
 		desc = priv->dma_tx + entry;
 
@@ -59,6 +59,7 @@ static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 					    len, DMA_TO_DEVICE);
 		desc->des3 = desc->des2 + BUF_SIZE_4KiB;
 		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum);
+		wmb();
 		priv->hw->desc->set_tx_owner(desc);
 		priv->tx_skbuff[entry] = NULL;
 	} else {
-- 
cgit v1.2.3


From a64d49c3dd504b685f9742a2f3dcb11fb8e4345f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 9 Jul 2012 10:51:45 +0000
Subject: bonding: Manage /proc/net/bonding/ entries from the netdev events

It was recently reported that moving a bonding device between network
namespaces causes warnings from /proc.  It turns out after the move we
were trying to add and to remove the /proc/net/bonding entries from the
wrong network namespace.

Move the bonding /proc registration code into the NETDEV_REGISTER and
NETDEV_UNREGISTER events where the proc registration and unregistration
will always happen at the right time.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index b9c2ae62166d..2ee76993f052 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3227,6 +3227,12 @@ static int bond_master_netdev_event(unsigned long event,
 	switch (event) {
 	case NETDEV_CHANGENAME:
 		return bond_event_changename(event_bond);
+	case NETDEV_UNREGISTER:
+		bond_remove_proc_entry(event_bond);
+		break;
+	case NETDEV_REGISTER:
+		bond_create_proc_entry(event_bond);
+		break;
 	default:
 		break;
 	}
@@ -4411,8 +4417,6 @@ static void bond_uninit(struct net_device *bond_dev)
 
 	bond_work_cancel_all(bond);
 
-	bond_remove_proc_entry(bond);
-
 	bond_debug_unregister(bond);
 
 	__hw_addr_flush(&bond->mc_list);
@@ -4814,7 +4818,6 @@ static int bond_init(struct net_device *bond_dev)
 
 	bond_set_lockdep_class(bond_dev);
 
-	bond_create_proc_entry(bond);
 	list_add_tail(&bond->bond_list, &bn->dev_list);
 
 	bond_prepare_sysfs_group(bond);
-- 
cgit v1.2.3


From 96ca7ffe748bf91f851e6aa4479aa11c8b1122ba Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 9 Jul 2012 10:52:43 +0000
Subject: bonding: debugfs and network namespaces are incompatible

The bonding debugfs support has been broken in the presence of network
namespaces since it has been added.  The debugfs support does not handle
multiple bonding devices with the same name in different network
namespaces.

I haven't had any bug reports, and I'm not interested in getting any.
Disable the debugfs support when network namespaces are enabled.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c
index 3680aa251dea..2cf084eb9d52 100644
--- a/drivers/net/bonding/bond_debugfs.c
+++ b/drivers/net/bonding/bond_debugfs.c
@@ -6,7 +6,7 @@
 #include "bonding.h"
 #include "bond_alb.h"
 
-#ifdef CONFIG_DEBUG_FS
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_NET_NS)
 
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-- 
cgit v1.2.3


From 91c68ce2b26319248a32d7baa1226f819d283758 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 8 Jul 2012 21:45:10 +0000
Subject: net: cgroup: fix out of bounds accesses

dev->priomap is allocated by extend_netdev_table() called from
update_netdev_tables().
And this is only called if write_priomap() is called.

But if write_priomap() is not called, it seems we can have out of bounds
accesses in cgrp_destroy(), read_priomap() & skb_update_prio()

With help from Gao Feng

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Gao feng <gaofeng@cn.fujitsu.com>
Acked-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c            | 8 ++++++--
 net/core/netprio_cgroup.c | 4 ++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 84f01ba81a34..0f28a9e0b8ad 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2444,8 +2444,12 @@ static void skb_update_prio(struct sk_buff *skb)
 {
 	struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
 
-	if ((!skb->priority) && (skb->sk) && map)
-		skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx];
+	if (!skb->priority && skb->sk && map) {
+		unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
+
+		if (prioidx < map->priomap_len)
+			skb->priority = map->priomap[prioidx];
+	}
 }
 #else
 #define skb_update_prio(skb)
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index aa907ed466ea..3e953eaddbfc 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -142,7 +142,7 @@ static void cgrp_destroy(struct cgroup *cgrp)
 	rtnl_lock();
 	for_each_netdev(&init_net, dev) {
 		map = rtnl_dereference(dev->priomap);
-		if (map)
+		if (map && cs->prioidx < map->priomap_len)
 			map->priomap[cs->prioidx] = 0;
 	}
 	rtnl_unlock();
@@ -166,7 +166,7 @@ static int read_priomap(struct cgroup *cont, struct cftype *cft,
 	rcu_read_lock();
 	for_each_netdev_rcu(&init_net, dev) {
 		map = rcu_dereference(dev->priomap);
-		priority = map ? map->priomap[prioidx] : 0;
+		priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0;
 		cb->fill(cb, dev->name, priority);
 	}
 	rcu_read_unlock();
-- 
cgit v1.2.3


From 1b9faf5e66bae8428a4ccdc1447d5399d1014581 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Sun, 8 Jul 2012 01:37:38 +0000
Subject: drivers/isdn/mISDN/stack.c: remove invalid reference to list iterator
 variable

If list_for_each_entry, etc complete a traversal of the list, the iterator
variable ends up pointing to an address at an offset from the list head,
and not a meaningful structure.  Thus this value should not be used after
the end of the iterator.  The dereferences are just deleted from the
debugging statement.

This problem was found using Coccinelle (http://coccinelle.lip6.fr/).

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/mISDN/stack.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
index 1a0ae4445ff2..5f21f629b7ae 100644
--- a/drivers/isdn/mISDN/stack.c
+++ b/drivers/isdn/mISDN/stack.c
@@ -135,8 +135,8 @@ send_layer2(struct mISDNstack *st, struct sk_buff *skb)
 			skb = NULL;
 		else if (*debug & DEBUG_SEND_ERR)
 			printk(KERN_DEBUG
-			       "%s ch%d mgr prim(%x) addr(%x) err %d\n",
-			       __func__, ch->nr, hh->prim, ch->addr, ret);
+			       "%s mgr prim(%x) err %d\n",
+			       __func__, hh->prim, ret);
 	}
 out:
 	mutex_unlock(&st->lmutex);
-- 
cgit v1.2.3


From cae296c42c94a27e0abdea96eb762752d1ba4908 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Sun, 8 Jul 2012 01:37:39 +0000
Subject: net/rxrpc/ar-peer.c: remove invalid reference to list iterator
 variable

If list_for_each_entry, etc complete a traversal of the list, the iterator
variable ends up pointing to an address at an offset from the list head,
and not a meaningful structure.  Thus this value should not be used after
the end of the iterator.  This seems to be a copy-paste bug from a previous
debugging message, and so the meaningless value is just deleted.

This problem was found using Coccinelle (http://coccinelle.lip6.fr/).

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-peer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index 2754f098d436..bebaa43484bc 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -229,7 +229,7 @@ found_UDP_peer:
 	return peer;
 
 new_UDP_peer:
-	_net("Rx UDP DGRAM from NEW peer %d", peer->debug_id);
+	_net("Rx UDP DGRAM from NEW peer");
 	read_unlock_bh(&rxrpc_peer_lock);
 	_leave(" = -EBUSY [new]");
 	return ERR_PTR(-EBUSY);
-- 
cgit v1.2.3


From 022f09784b85396b4ceba954ce28e50de4882281 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Sun, 8 Jul 2012 01:37:43 +0000
Subject: drivers/net/ethernet/broadcom/cnic.c: remove invalid reference to
 list iterator variable

If list_for_each_entry, etc complete a traversal of the list, the iterator
variable ends up pointing to an address at an offset from the list head,
and not a meaningful structure.  Thus this value should not be used after
the end of the iterator.  There does not seem to be a meaningful value to
provide to netdev_warn.  Replace with pr_warn, since pr_err is used
elsewhere.

This problem was found using Coccinelle (http://coccinelle.lip6.fr/).

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/cnic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index 3c95065e0def..2c89d17cbb29 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -534,7 +534,8 @@ int cnic_unregister_driver(int ulp_type)
 	}
 
 	if (atomic_read(&ulp_ops->ref_count) != 0)
-		netdev_warn(dev->netdev, "Failed waiting for ref count to go to zero\n");
+		pr_warn("%s: Failed waiting for ref count to go to zero\n",
+			__func__);
 	return 0;
 
 out_unlock:
-- 
cgit v1.2.3


From 313b037cf054ec908de92fb4c085403ffd7420d4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 5 Jul 2012 11:45:13 +0000
Subject: gianfar: fix potential sk_wmem_alloc imbalance

commit db83d136d7f753 (gianfar: Fix missing sock reference when
processing TX time stamps) added a potential sk_wmem_alloc imbalance

If the new skb has a different truesize than old one, we can get a
negative sk_wmem_alloc once new skb is orphaned at TX completion.

Now we no longer early orphan skbs in dev_hard_start_xmit(), this
probably can lead to fatal bugs.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Tested-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Manfred Rudigier <manfred.rudigier@omicron.at>
Cc: Claudiu Manoil <claudiu.manoil@freescale.com>
Cc: Jiajun Wu <b06378@freescale.com>
Cc: Andy Fleming <afleming@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/gianfar.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index f2db8fca46a1..ab1d80ff0791 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2063,10 +2063,9 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			return NETDEV_TX_OK;
 		}
 
-		/* Steal sock reference for processing TX time stamps */
-		swap(skb_new->sk, skb->sk);
-		swap(skb_new->destructor, skb->destructor);
-		kfree_skb(skb);
+		if (skb->sk)
+			skb_set_owner_w(skb_new, skb->sk);
+		consume_skb(skb);
 		skb = skb_new;
 	}
 
-- 
cgit v1.2.3


From 1ba9a294141b106b7247649a5c3372d8284eca80 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Fri, 6 Jul 2012 15:07:48 +0100
Subject: x86/mm/mtrr: Fix alignment determination in range_to_mtrr()

With the variable operated on being of "unsigned long" type,
neither ffs() nor fls() are suitable to use on them, as those
truncate their arguments to 32 bits. Using __ffs() and __fls()
respectively at once eliminates the need to subtract 1 from their
results.

Additionally, with the alignment value subsequently used as a
shift count, it must be enforced to be less than BITS_PER_LONG
(and on 64-bit there's no need for it to be any smaller).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/4FF70D54020000780008E179@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/mtrr/cleanup.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index bdda2e6c673b..35ffda5d0727 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -258,11 +258,11 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
 
 		/* Compute the maximum size with which we can make a range: */
 		if (range_startk)
-			max_align = ffs(range_startk) - 1;
+			max_align = __ffs(range_startk);
 		else
-			max_align = 32;
+			max_align = BITS_PER_LONG - 1;
 
-		align = fls(range_sizek) - 1;
+		align = __fls(range_sizek);
 		if (align > max_align)
 			align = max_align;
 
-- 
cgit v1.2.3


From a7101d152665817bf7cafc47e7f5dcb1f54664fe Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Fri, 6 Jul 2012 15:20:35 +0100
Subject: x86/mm/mtrr: Slightly simplify print_mtrr_state()

high_width can be easily calculated in a single expression when
making use of __ffs64().

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/4FF71053020000780008E1B5@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/mtrr/generic.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 75772ae6c65f..e9fe907cd249 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -361,11 +361,7 @@ static void __init print_mtrr_state(void)
 	}
 	pr_debug("MTRR variable ranges %sabled:\n",
 		 mtrr_state.enabled & 2 ? "en" : "dis");
-	if (size_or_mask & 0xffffffffUL)
-		high_width = ffs(size_or_mask & 0xffffffffUL) - 1;
-	else
-		high_width = ffs(size_or_mask>>32) + 32 - 1;
-	high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4;
+	high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4;
 
 	for (i = 0; i < num_var_ranges; ++i) {
 		if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
-- 
cgit v1.2.3


From efc73f4bbc238d4f579fb612c04c8e1dd8a82979 Mon Sep 17 00:00:00 2001
From: Amir Hanania <amir.hanania@intel.com>
Date: Mon, 9 Jul 2012 20:47:19 +0000
Subject: net: Fix memory leak - vlan_info struct

In driver reload test there is a memory leak.
The structure vlan_info was not freed when the driver was removed.
It was not released since the nr_vids var is one after last vlan was removed.
The nr_vids is one, since vlan zero is added to the interface when the interface
is being set, but the vlan zero is not deleted at unregister.
Fix - delete vlan zero when we unregister the device.

Signed-off-by: Amir Hanania <amir.hanania@intel.com>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 6089f0cf23b4..9096bcb08132 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -403,6 +403,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		break;
 
 	case NETDEV_DOWN:
+		if (dev->features & NETIF_F_HW_VLAN_FILTER)
+			vlan_vid_del(dev, 0);
+
 		/* Put all VLANs for this dev in the down state too.  */
 		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
-- 
cgit v1.2.3


From b28ba72665356438e3a6e3be365c3c3071496840 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 10 Jul 2012 10:03:41 +0000
Subject: IPoIB: fix skb truesize underestimatiom

Or Gerlitz reported triggering of WARN_ON_ONCE(delta < len); in
skb_try_coalesce()
This warning tracks drivers that incorrectly set skb->truesize

IPoIB indeed allocates a full page to store a fragment, but only
accounts in skb->truesize the used part of the page (frame length)

This patch fixes skb truesize underestimation, and
also fixes a performance issue, because RX skbs have not enough tailroom
to allow IP and TCP stacks to pull their header in skb linear part
without an expensive call to pskb_expand_head()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Or Gerlitz <ogerlitz@mellanox.com>
Cc: Erez Shitrit <erezsh@mellanox.com>
Cc: Shlomo Pongartz <shlomop@mellanox.com>
Cc: Roland Dreier <roland@purestorage.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/ulp/ipoib/ipoib_ib.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 5c1bc995e560..f10221f40803 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -123,7 +123,7 @@ static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv,
 
 		skb_frag_size_set(frag, size);
 		skb->data_len += size;
-		skb->truesize += size;
+		skb->truesize += PAGE_SIZE;
 	} else
 		skb_put(skb, length);
 
@@ -156,14 +156,18 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct sk_buff *skb;
 	int buf_size;
+	int tailroom;
 	u64 *mapping;
 
-	if (ipoib_ud_need_sg(priv->max_ib_mtu))
+	if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
 		buf_size = IPOIB_UD_HEAD_SIZE;
-	else
+		tailroom = 128; /* reserve some tailroom for IP/TCP headers */
+	} else {
 		buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
+		tailroom = 0;
+	}
 
-	skb = dev_alloc_skb(buf_size + 4);
+	skb = dev_alloc_skb(buf_size + tailroom + 4);
 	if (unlikely(!skb))
 		return NULL;
 
-- 
cgit v1.2.3


From c1f5163de417dab01fa9daaf09a74bbb19303f3c Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Tue, 10 Jul 2012 10:04:40 +0000
Subject: bnx2: Fix bug in bnx2_free_tx_skbs().

In rare cases, bnx2x_free_tx_skbs() can unmap the wrong DMA address
when it gets to the last entry of the tx ring.  We were not using
the proper macro to skip the last entry when advancing the tx index.

Reported-by: Zongyun Lai <zlai@vmware.com>
Reviewed-by: Jeffrey Huang <huangjw@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index ac7b74488531..1fa4927a45b1 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -5372,7 +5372,7 @@ bnx2_free_tx_skbs(struct bnx2 *bp)
 			int k, last;
 
 			if (skb == NULL) {
-				j++;
+				j = NEXT_TX_BD(j);
 				continue;
 			}
 
@@ -5384,8 +5384,8 @@ bnx2_free_tx_skbs(struct bnx2 *bp)
 			tx_buf->skb = NULL;
 
 			last = tx_buf->nr_frags;
-			j++;
-			for (k = 0; k < last; k++, j++) {
+			j = NEXT_TX_BD(j);
+			for (k = 0; k < last; k++, j = NEXT_TX_BD(j)) {
 				tx_buf = &txr->tx_buf_ring[TX_RING_IDX(j)];
 				dma_unmap_page(&bp->pdev->dev,
 					dma_unmap_addr(tx_buf, mapping),
-- 
cgit v1.2.3


From 93574fcc5b50cc7b8834698acb2ce947e5b6a5dc Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 11 Jul 2012 09:35:08 +0300
Subject: tracing: Check for allocation failure in __tracing_open()

Clean up and return -ENOMEM on if the kzalloc() fails.

This also prevents a potential crash, as the pointer that failed to
allocate would be later used.

Link: http://lkml.kernel.org/r/20120711063507.GF11812@elgon.mountain

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 814ff306ae74..a120f98c4112 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2390,6 +2390,9 @@ __tracing_open(struct inode *inode, struct file *file)
 
 	iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
 				    GFP_KERNEL);
+	if (!iter->buffer_iter)
+		goto release;
+
 	/*
 	 * We make a copy of the current tracer to avoid concurrent
 	 * changes on it while we are reading.
@@ -2451,6 +2454,7 @@ __tracing_open(struct inode *inode, struct file *file)
 	mutex_unlock(&trace_types_lock);
 	kfree(iter->trace);
 	kfree(iter->buffer_iter);
+release:
 	seq_release_private(inode, file);
 	return ERR_PTR(-ENOMEM);
 }
-- 
cgit v1.2.3


From 7ac2908e4b2edaec60e9090ddb4d9ceb76c05e7d Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 12 Jul 2012 03:39:11 +0000
Subject: sch_sfb: Fix missing NULL check

Resolves-bug: https://bugzilla.kernel.org/show_bug.cgi?id=44461

Signed-off-by: Alan Cox <alan@linux.intel.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 74305c883bd3..30ea4674cabd 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -570,6 +570,8 @@ static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb)
 
 	sch->qstats.backlog = q->qdisc->qstats.backlog;
 	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (opts == NULL)
+		goto nla_put_failure;
 	if (nla_put(skb, TCA_SFB_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 	return nla_nest_end(skb, opts);
-- 
cgit v1.2.3


From e3a746f5aab71f2dd0a83116772922fb37ae29d6 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 12 Jul 2012 07:40:42 +1000
Subject: xfs: really fix the cursor leak in xfs_alloc_ag_vextent_near

The current cursor is reallocated when retrying the allocation, so
the existing cursor needs to be destroyed in both the restart and
the failure cases.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Tested-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_alloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 9d1aeb7e2734..f654f51b0c67 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1074,13 +1074,13 @@ restart:
 	 * If we couldn't get anything, give up.
 	 */
 	if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
+		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+
 		if (!forced++) {
 			trace_xfs_alloc_near_busy(args);
 			xfs_log_force(args->mp, XFS_LOG_SYNC);
 			goto restart;
 		}
-
-		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 		trace_xfs_alloc_size_neither(args);
 		args->agbno = NULLAGBLOCK;
 		return 0;
-- 
cgit v1.2.3


From aa292847b9fc6e187547110de833a7d3131bbddf Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 12 Jul 2012 07:40:43 +1000
Subject: xfs: don't defer metadata allocation to the workqueue

Almost all metadata allocations come from shallow stack usage
situations. Avoid the overhead of switching the allocation to a
workqueue as we are not in danger of running out of stack when
making these allocations. Metadata allocations are already marked
through the args that are passed down, so this is trivial to do.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reported-by: Mel Gorman <mgorman@suse.de>
Tested-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_alloc.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index f654f51b0c67..4f33c32affe3 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2434,13 +2434,22 @@ xfs_alloc_vextent_worker(
 	current_restore_flags_nested(&pflags, PF_FSTRANS);
 }
 
-
-int				/* error */
+/*
+ * Data allocation requests often come in with little stack to work on. Push
+ * them off to a worker thread so there is lots of stack to use. Metadata
+ * requests, OTOH, are generally from low stack usage paths, so avoid the
+ * context switch overhead here.
+ */
+int
 xfs_alloc_vextent(
-	xfs_alloc_arg_t	*args)	/* allocation argument structure */
+	struct xfs_alloc_arg	*args)
 {
 	DECLARE_COMPLETION_ONSTACK(done);
 
+	if (!args->userdata)
+		return __xfs_alloc_vextent(args);
+
+
 	args->done = &done;
 	INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
 	queue_work(xfs_alloc_wq, &args->work);
-- 
cgit v1.2.3


From 40a9b7963df32e743c45d79a5f41445fe2476f15 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 2 Jul 2012 06:00:04 -0400
Subject: xfs: prevent recursion in xfs_buf_iorequest

If the b_iodone handler is run in calling context in xfs_buf_iorequest we
can run into a recursion where xfs_buf_iodone_callbacks keeps calling back
into xfs_buf_iorequest because an I/O error happened, which keeps calling
back into xfs_buf_iorequest.  This chain will usually not take long
because the filesystem gets shut down because of log I/O errors, but even
over a short time it can cause stack overflows if run on the same context.

As a short term workaround make sure we always call the iodone handler in
workqueue context.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_buf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index a4beb421018a..687b275f165c 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1243,7 +1243,7 @@ xfs_buf_iorequest(
 	 */
 	atomic_set(&bp->b_io_remaining, 1);
 	_xfs_buf_ioapply(bp);
-	_xfs_buf_ioend(bp, 0);
+	_xfs_buf_ioend(bp, 1);
 
 	xfs_buf_rele(bp);
 }
-- 
cgit v1.2.3


From 1632dcc93f55f9ab407b373da1957a727b1a7fe3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 13 Jul 2012 02:24:10 -0400
Subject: xfs: do not call xfs_bdstrat_cb in xfs_buf_iodone_callbacks

xfs_bdstrat_cb only adds a check for a shutdown filesystem over
xfs_buf_iorequest, but xfs_buf_iodone_callbacks just checked for a shut down
filesystem a little earlier.  In addition the shutdown handling in
xfs_bdstrat_cb is not very suitable for this caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_buf.c      | 51 ++++++++++++++++++++++-----------------------------
 fs/xfs/xfs_buf.h      |  1 -
 fs/xfs/xfs_buf_item.c |  2 +-
 3 files changed, 23 insertions(+), 31 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 687b275f165c..269b35c084da 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -989,27 +989,6 @@ xfs_buf_ioerror_alert(
 		(__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length);
 }
 
-int
-xfs_bwrite(
-	struct xfs_buf		*bp)
-{
-	int			error;
-
-	ASSERT(xfs_buf_islocked(bp));
-
-	bp->b_flags |= XBF_WRITE;
-	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
-
-	xfs_bdstrat_cb(bp);
-
-	error = xfs_buf_iowait(bp);
-	if (error) {
-		xfs_force_shutdown(bp->b_target->bt_mount,
-				   SHUTDOWN_META_IO_ERROR);
-	}
-	return error;
-}
-
 /*
  * Called when we want to stop a buffer from getting written or read.
  * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
@@ -1079,14 +1058,7 @@ xfs_bioerror_relse(
 	return EIO;
 }
 
-
-/*
- * All xfs metadata buffers except log state machine buffers
- * get this attached as their b_bdstrat callback function.
- * This is so that we can catch a buffer
- * after prematurely unpinning it to forcibly shutdown the filesystem.
- */
-int
+STATIC int
 xfs_bdstrat_cb(
 	struct xfs_buf	*bp)
 {
@@ -1107,6 +1079,27 @@ xfs_bdstrat_cb(
 	return 0;
 }
 
+int
+xfs_bwrite(
+	struct xfs_buf		*bp)
+{
+	int			error;
+
+	ASSERT(xfs_buf_islocked(bp));
+
+	bp->b_flags |= XBF_WRITE;
+	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
+
+	xfs_bdstrat_cb(bp);
+
+	error = xfs_buf_iowait(bp);
+	if (error) {
+		xfs_force_shutdown(bp->b_target->bt_mount,
+				   SHUTDOWN_META_IO_ERROR);
+	}
+	return error;
+}
+
 /*
  * Wrapper around bdstrat so that we can stop data from going to disk in case
  * we are shutting down the filesystem.  Typically user data goes thru this
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 7f1d1392ce37..79344c48008e 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -180,7 +180,6 @@ extern void xfs_buf_unlock(xfs_buf_t *);
 extern int xfs_bwrite(struct xfs_buf *bp);
 
 extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
-extern int xfs_bdstrat_cb(struct xfs_buf *);
 
 extern void xfs_buf_ioend(xfs_buf_t *,	int);
 extern void xfs_buf_ioerror(xfs_buf_t *, int);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 45df2b857d48..d9e451115f98 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -954,7 +954,7 @@ xfs_buf_iodone_callbacks(
 
 		if (!XFS_BUF_ISSTALE(bp)) {
 			bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
-			xfs_bdstrat_cb(bp);
+			xfs_buf_iorequest(bp);
 		} else {
 			xfs_buf_relse(bp);
 		}
-- 
cgit v1.2.3


From d0efa8f23a644f7cb7d1f8e78dd9a223efa412a3 Mon Sep 17 00:00:00 2001
From: Tushar Dave <tushar.n.dave@intel.com>
Date: Thu, 12 Jul 2012 08:56:56 +0000
Subject: e1000e: Correct link check logic for 82571 serdes

SYNCH bit and IV bit of RXCW register are sticky. Before examining these bits,
RXCW should be read twice to filter out one-time false events and have correct
values for these bits. Incorrect values of these bits in link check logic can
cause weird link stability issues if auto-negotiation fails.

CC: stable <stable@vger.kernel.org> [2.6.38+]
Reported-by: Dean Nelson <dnelson@redhat.com>
Signed-off-by: Tushar Dave <tushar.n.dave@intel.com>
Reviewed-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/82571.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
index 36db4df09aed..1f063dcd8f85 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.c
+++ b/drivers/net/ethernet/intel/e1000e/82571.c
@@ -1572,6 +1572,9 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw)
 	ctrl = er32(CTRL);
 	status = er32(STATUS);
 	rxcw = er32(RXCW);
+	/* SYNCH bit and IV bit are sticky */
+	udelay(10);
+	rxcw = er32(RXCW);
 
 	if ((rxcw & E1000_RXCW_SYNCH) && !(rxcw & E1000_RXCW_IV)) {
 
-- 
cgit v1.2.3


From a52359b56c29f55aaadf1dab80a0e1043b982676 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Sat, 14 Jul 2012 04:23:58 +0000
Subject: e1000e: fix test for PHY being accessible on 82577/8/9 and I217

Occasionally, the PHY can be initially inaccessible when the first read of
a PHY register, e.g. PHY_ID1, happens (signified by the returned value
0xFFFF) but subsequent accesses of the PHY work as expected.  Add a retry
counter similar to how it is done in the generic e1000_get_phy_id().

Also, when the PHY is completely inaccessible (i.e. when subsequent reads
of the PHY_IDx registers returns all F's) and the MDIO access mode must be
set to slow before attempting to read the PHY ID again, the functions that
do these latter two actions expect the SW/FW/HW semaphore is not already
set so the semaphore must be released before and re-acquired after calling
them otherwise there is an unnecessarily inordinate amount of delay during
device initialization.

Reported-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/ich8lan.c | 42 ++++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 238ab2f8a5e7..e3a7b07df629 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -325,24 +325,46 @@ static inline void __ew32flash(struct e1000_hw *hw, unsigned long reg, u32 val)
  **/
 static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw)
 {
-	u16 phy_reg;
-	u32 phy_id;
+	u16 phy_reg = 0;
+	u32 phy_id = 0;
+	s32 ret_val;
+	u16 retry_count;
+
+	for (retry_count = 0; retry_count < 2; retry_count++) {
+		ret_val = e1e_rphy_locked(hw, PHY_ID1, &phy_reg);
+		if (ret_val || (phy_reg == 0xFFFF))
+			continue;
+		phy_id = (u32)(phy_reg << 16);
 
-	e1e_rphy_locked(hw, PHY_ID1, &phy_reg);
-	phy_id = (u32)(phy_reg << 16);
-	e1e_rphy_locked(hw, PHY_ID2, &phy_reg);
-	phy_id |= (u32)(phy_reg & PHY_REVISION_MASK);
+		ret_val = e1e_rphy_locked(hw, PHY_ID2, &phy_reg);
+		if (ret_val || (phy_reg == 0xFFFF)) {
+			phy_id = 0;
+			continue;
+		}
+		phy_id |= (u32)(phy_reg & PHY_REVISION_MASK);
+		break;
+	}
 
 	if (hw->phy.id) {
 		if (hw->phy.id == phy_id)
 			return true;
-	} else {
-		if ((phy_id != 0) && (phy_id != PHY_REVISION_MASK))
-			hw->phy.id = phy_id;
+	} else if (phy_id) {
+		hw->phy.id = phy_id;
+		hw->phy.revision = (u32)(phy_reg & ~PHY_REVISION_MASK);
 		return true;
 	}
 
-	return false;
+	/*
+	 * In case the PHY needs to be in mdio slow mode,
+	 * set slow mode and try to get the PHY id again.
+	 */
+	hw->phy.ops.release(hw);
+	ret_val = e1000_set_mdio_slow_mode_hv(hw);
+	if (!ret_val)
+		ret_val = e1000e_get_phy_id(hw);
+	hw->phy.ops.acquire(hw);
+
+	return !ret_val;
 }
 
 /**
-- 
cgit v1.2.3


From a6e7360b22f2360d7640f99051415d49cebeb908 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <dong.aisheng@linaro.org>
Date: Thu, 21 Jun 2012 18:10:35 +0800
Subject: pinctrl: pinctrl-imx: only print debug message when DEBUG is defined

Fix regression for commit 3a86a5f8 (pinctrl: pinctrl-imx: free allocated
pinctrl_map structure only once and use kernel facilities for IMX_PMX_DUMP)
introduced in 3.5-rc3.
With above commit, the debug code will alway be excuted.
Change to excute it only when DEBUG is defined.

Signed-off-by: Dong Aisheng <dong.aisheng@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-imx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pinctrl/pinctrl-imx.c b/drivers/pinctrl/pinctrl-imx.c
index dd6d93aa5334..90c837f469a6 100644
--- a/drivers/pinctrl/pinctrl-imx.c
+++ b/drivers/pinctrl/pinctrl-imx.c
@@ -474,7 +474,9 @@ static int __devinit imx_pinctrl_parse_groups(struct device_node *np,
 		grp->configs[j] = config & ~IMX_PAD_SION;
 	}
 
+#ifdef DEBUG
 	IMX_PMX_DUMP(info, grp->pins, grp->mux_mode, grp->configs, grp->npins);
+#endif
 
 	return 0;
 }
-- 
cgit v1.2.3


From 4a5f7eff8b0b34354e5c63272835e5e2dfe1c933 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <dong.aisheng@linaro.org>
Date: Fri, 6 Jul 2012 17:09:23 +0800
Subject: pinctrl: pinctrl-imx6q: add missed mux function for USBOTG_ID

The original pin registers table is derived from u-boot mainline,
but somehow it was found missing some mux functions for USBOTG_ID.
We added it at the bottom by following the exist pin function ids,
then it will not break the exist using of pin function id in dts file.

Reported-by: Richard Zhao <richard.zhao@freescale.com>
Signed-off-by: Dong Aisheng <dong.aisheng@linaro.org>
---
 Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt | 2 ++
 drivers/pinctrl/pinctrl-imx6q.c                                 | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt
index 82b43f915857..a4119f6422d9 100644
--- a/Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt
@@ -1626,3 +1626,5 @@ MX6Q_PAD_SD2_DAT3__PCIE_CTRL_MUX_11		1587
 MX6Q_PAD_SD2_DAT3__GPIO_1_12			1588
 MX6Q_PAD_SD2_DAT3__SJC_DONE			1589
 MX6Q_PAD_SD2_DAT3__ANATOP_TESTO_3		1590
+MX6Q_PAD_ENET_RX_ER__ANATOP_USBOTG_ID		1591
+MX6Q_PAD_GPIO_1__ANATOP_USBOTG_ID		1592
diff --git a/drivers/pinctrl/pinctrl-imx6q.c b/drivers/pinctrl/pinctrl-imx6q.c
index 7737d4d71a3c..e9bf71fbedca 100644
--- a/drivers/pinctrl/pinctrl-imx6q.c
+++ b/drivers/pinctrl/pinctrl-imx6q.c
@@ -1950,6 +1950,8 @@ static struct imx_pin_reg imx6q_pin_regs[] = {
 	IMX_PIN_REG(MX6Q_PAD_SD2_DAT3, 0x0744, 0x035C, 5, 0x0000, 0), /* MX6Q_PAD_SD2_DAT3__GPIO_1_12 */
 	IMX_PIN_REG(MX6Q_PAD_SD2_DAT3, 0x0744, 0x035C, 6, 0x0000, 0), /* MX6Q_PAD_SD2_DAT3__SJC_DONE */
 	IMX_PIN_REG(MX6Q_PAD_SD2_DAT3, 0x0744, 0x035C, 7, 0x0000, 0), /* MX6Q_PAD_SD2_DAT3__ANATOP_TESTO_3 */
+	IMX_PIN_REG(MX6Q_PAD_ENET_RX_ER, 0x04EC, 0x01D8, 0, 0x0000, 0), /* MX6Q_PAD_ENET_RX_ER__ANATOP_USBOTG_ID */
+	IMX_PIN_REG(MX6Q_PAD_GPIO_1, 0x05F4, 0x0224, 3, 0x0000, 0), /* MX6Q_PAD_GPIO_1__ANATOP_USBOTG_ID */
 };
 
 /* Pad names for the pinmux subsystem */
-- 
cgit v1.2.3


From 3cc5d2a6b9a2fd1bf024aa5e52dd22961eecaf13 Mon Sep 17 00:00:00 2001
From: Mark Rustad <mark.d.rustad@intel.com>
Date: Fri, 13 Jul 2012 18:18:04 -0700
Subject: tcm_fc: Fix crash seen with aborts and large reads

This patch fixes a crash seen when large reads have their exchange
aborted by either timing out or being reset. Because the exchange
abort results in the seq pointer being set to NULL, because the
sequence is no longer valid, it must not be dereferenced. This
patch changes the function ft_get_task_tag to return ~0 if it is
unable to get the tag for this reason. Because the get_task_tag
interface provides no means of returning an error, this seems
like the best way to fix this issue at the moment.

Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/tcm_fc/tfc_cmd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index f03fb9730f5b..5b65f33939a8 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -230,6 +230,8 @@ u32 ft_get_task_tag(struct se_cmd *se_cmd)
 {
 	struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd);
 
+	if (cmd->aborted)
+		return ~0;
 	return fc_seq_exch(cmd->seq)->rxid;
 }
 
-- 
cgit v1.2.3


From 64919e60915c5151b3dd4c8d2d9237a115ca990c Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 6 Jul 2012 14:13:29 -0400
Subject: SELinux: include definition of new capabilities

The kernel has added CAP_WAKE_ALARM and CAP_EPOLLWAKEUP.  We need to
define these in SELinux so they can be mediated by policy.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 security/selinux/include/classmap.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index b8c53723e09b..0b04fd9e9e3e 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -145,7 +145,9 @@ struct security_class_mapping secclass_map[] = {
 	    "node_bind", "name_connect", NULL } },
 	{ "memprotect", { "mmap_zero", NULL } },
 	{ "peer", { "recv", NULL } },
-	{ "capability2", { "mac_override", "mac_admin", "syslog", NULL } },
+	{ "capability2",
+	  { "mac_override", "mac_admin", "syslog", "wake_alarm", "epollwakeup",
+	    NULL } },
 	{ "kernel_service", { "use_as_override", "create_files_as", NULL } },
 	{ "tun_socket",
 	  { COMMON_SOCK_PERMS, NULL } },
-- 
cgit v1.2.3


From 3d2195c3324b27e65ba53d9626a6bd91a2515797 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 6 Jul 2012 14:13:30 -0400
Subject: SELinux: do not check open perms if they are not known to policy

When I introduced open perms policy didn't understand them and I
implemented them as a policycap.  When I added the checking of open perm
to truncate I forgot to conditionalize it on the userspace defined
policy capability.  Running an old policy with a new kernel will not
check open on open(2) but will check it on truncate.  Conditionalize the
truncate check the same as the open check.

Signed-off-by: Eric Paris <eparis@redhat.com>
Cc: stable@vger.kernel.org # 3.4.x
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 security/selinux/hooks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 372ec6502aa8..ffd8900a38e8 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2717,7 +2717,7 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
 			ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET))
 		return dentry_has_perm(cred, dentry, FILE__SETATTR);
 
-	if (ia_valid & ATTR_SIZE)
+	if (selinux_policycap_openperm && (ia_valid & ATTR_SIZE))
 		av |= FILE__OPEN;
 
 	return dentry_has_perm(cred, dentry, av);
-- 
cgit v1.2.3


From 46c87852e99cf8ce97e207b11cde19085837e39c Mon Sep 17 00:00:00 2001
From: Prathyush K <prathyush.k@samsung.com>
Date: Mon, 16 Jul 2012 08:59:55 +0200
Subject: ARM: dma-mapping: modify condition check while freeing pages

WARNING: at mm/vmalloc.c:1471 __iommu_free_buffer+0xcc/0xd0()
Trying to vfree() nonexistent vm area (ef095000)
Modules linked in:
[<c0015a18>] (unwind_backtrace+0x0/0xfc) from [<c0025a94>] (warn_slowpath_common+0x54/0x64)
[<c0025a94>] (warn_slowpath_common+0x54/0x64) from [<c0025b38>] (warn_slowpath_fmt+0x30/0x40)
[<c0025b38>] (warn_slowpath_fmt+0x30/0x40) from [<c0016de0>] (__iommu_free_buffer+0xcc/0xd0)
[<c0016de0>] (__iommu_free_buffer+0xcc/0xd0) from [<c0229a5c>] (exynos_drm_free_buf+0xe4/0x138)
[<c0229a5c>] (exynos_drm_free_buf+0xe4/0x138) from [<c022b358>] (exynos_drm_gem_destroy+0x80/0xfc)
[<c022b358>] (exynos_drm_gem_destroy+0x80/0xfc) from [<c0211230>] (drm_gem_object_free+0x28/0x34)
[<c0211230>] (drm_gem_object_free+0x28/0x34) from [<c0211bd0>] (drm_gem_object_release_handle+0xcc/0xd8)
[<c0211bd0>] (drm_gem_object_release_handle+0xcc/0xd8) from [<c01abe10>] (idr_for_each+0x74/0xb8)
[<c01abe10>] (idr_for_each+0x74/0xb8) from [<c02114e4>] (drm_gem_release+0x1c/0x30)
[<c02114e4>] (drm_gem_release+0x1c/0x30) from [<c0210ae8>] (drm_release+0x608/0x694)
[<c0210ae8>] (drm_release+0x608/0x694) from [<c00b75a0>] (fput+0xb8/0x228)
[<c00b75a0>] (fput+0xb8/0x228) from [<c00b40c4>] (filp_close+0x64/0x84)
[<c00b40c4>] (filp_close+0x64/0x84) from [<c0029d54>] (put_files_struct+0xe8/0x104)
[<c0029d54>] (put_files_struct+0xe8/0x104) from [<c002b930>] (do_exit+0x608/0x774)
[<c002b930>] (do_exit+0x608/0x774) from [<c002bae4>] (do_group_exit+0x48/0xb4)
[<c002bae4>] (do_group_exit+0x48/0xb4) from [<c002bb60>] (sys_exit_group+0x10/0x18)
[<c002bb60>] (sys_exit_group+0x10/0x18) from [<c000ee80>] (ret_fast_syscall+0x0/0x30)

This patch modifies the condition while freeing to match the condition
used while allocation. This fixes the above warning which arises when
array size is equal to PAGE_SIZE where allocation is done using kzalloc
but free is done using vfree.

Signed-off-by: Prathyush K <prathyush.k@samsung.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 arch/arm/mm/dma-mapping.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 4044abcf6f9d..655878bcc96d 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1091,7 +1091,7 @@ error:
 	while (--i)
 		if (pages[i])
 			__free_pages(pages[i], 0);
-	if (array_size < PAGE_SIZE)
+	if (array_size <= PAGE_SIZE)
 		kfree(pages);
 	else
 		vfree(pages);
@@ -1106,7 +1106,7 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t s
 	for (i = 0; i < count; i++)
 		if (pages[i])
 			__free_pages(pages[i], 0);
-	if (array_size < PAGE_SIZE)
+	if (array_size <= PAGE_SIZE)
 		kfree(pages);
 	else
 		vfree(pages);
-- 
cgit v1.2.3


From 310e158cc3b7a6adf41e778d52be746c4dc88561 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 16 Jul 2012 13:15:52 +0200
Subject: net: respect GFP_DMA in __netdev_alloc_skb()

Few drivers use GFP_DMA allocations, and netdev_alloc_frag()
doesn't allocate pages in DMA zone.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 46a3d23d259e..d124306b81fd 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -353,7 +353,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
 	unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) +
 			      SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
-	if (fragsz <= PAGE_SIZE && !(gfp_mask & __GFP_WAIT)) {
+	if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
 		void *data = netdev_alloc_frag(fragsz);
 
 		if (likely(data)) {
-- 
cgit v1.2.3


From 05d290d66be6ef77a0b962ebecf01911bd984a78 Mon Sep 17 00:00:00 2001
From: Anders Kaseorg <andersk@MIT.EDU>
Date: Sun, 15 Jul 2012 17:14:25 -0400
Subject: fifo: Do not restart open() if it already found a partner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If a parent and child process open the two ends of a fifo, and the
child immediately exits, the parent may receive a SIGCHLD before its
open() returns.  In that case, we need to make sure that open() will
return successfully after the SIGCHLD handler returns, instead of
throwing EINTR or being restarted.  Otherwise, the restarted open()
would incorrectly wait for a second partner on the other end.

The following test demonstrates the EINTR that was wrongly thrown from
the parent’s open().  Change .sa_flags = 0 to .sa_flags = SA_RESTART
to see a deadlock instead, in which the restarted open() waits for a
second reader that will never come.  (On my systems, this happens
pretty reliably within about 5 to 500 iterations.  Others report that
it manages to loop ~forever sometimes; YMMV.)

  #include <sys/stat.h>
  #include <sys/types.h>
  #include <sys/wait.h>
  #include <fcntl.h>
  #include <signal.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <unistd.h>

  #define CHECK(x) do if ((x) == -1) {perror(#x); abort();} while(0)

  void handler(int signum) {}

  int main()
  {
      struct sigaction act = {.sa_handler = handler, .sa_flags = 0};
      CHECK(sigaction(SIGCHLD, &act, NULL));
      CHECK(mknod("fifo", S_IFIFO | S_IRWXU, 0));
      for (;;) {
          int fd;
          pid_t pid;
          putc('.', stderr);
          CHECK(pid = fork());
          if (pid == 0) {
              CHECK(fd = open("fifo", O_RDONLY));
              _exit(0);
          }
          CHECK(fd = open("fifo", O_WRONLY));
          CHECK(close(fd));
          CHECK(waitpid(pid, NULL, 0));
      }
  }

This is what I suspect was causing the Git test suite to fail in
t9010-svn-fe.sh:

	http://bugs.debian.org/678852

Signed-off-by: Anders Kaseorg <andersk@mit.edu>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fifo.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/fifo.c b/fs/fifo.c
index b1a524d798e7..cf6f4345ceb0 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -14,7 +14,7 @@
 #include <linux/sched.h>
 #include <linux/pipe_fs_i.h>
 
-static void wait_for_partner(struct inode* inode, unsigned int *cnt)
+static int wait_for_partner(struct inode* inode, unsigned int *cnt)
 {
 	int cur = *cnt;	
 
@@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt)
 		if (signal_pending(current))
 			break;
 	}
+	return cur == *cnt ? -ERESTARTSYS : 0;
 }
 
 static void wake_up_partner(struct inode* inode)
@@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 				 * seen a writer */
 				filp->f_version = pipe->w_counter;
 			} else {
-				wait_for_partner(inode, &pipe->w_counter);
-				if(signal_pending(current))
+				if (wait_for_partner(inode, &pipe->w_counter))
 					goto err_rd;
 			}
 		}
@@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 			wake_up_partner(inode);
 
 		if (!pipe->readers) {
-			wait_for_partner(inode, &pipe->r_counter);
-			if (signal_pending(current))
+			if (wait_for_partner(inode, &pipe->r_counter))
 				goto err_wr;
 		}
 		break;
-- 
cgit v1.2.3


From f507598b06ab00fb46495ccdeeb3ef9c1dc43dee Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Mon, 16 Jul 2012 17:51:50 +0100
Subject: gma500: Fix lid related crash

We now set up the lid timer before we set up the backlight.  On some
devices that causes a crash as we do a backlight change before or during
the setup.

As this fixes a crash on boot regression on some setups it ought to go
in ASAP, especially as all the user gets is a blank screen.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Tested-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/gma500/psb_device.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/gma500/psb_device.c b/drivers/gpu/drm/gma500/psb_device.c
index eff039bf92d4..5971bc82b765 100644
--- a/drivers/gpu/drm/gma500/psb_device.c
+++ b/drivers/gpu/drm/gma500/psb_device.c
@@ -144,6 +144,10 @@ static int psb_backlight_init(struct drm_device *dev)
 	psb_backlight_device->props.max_brightness = 100;
 	backlight_update_status(psb_backlight_device);
 	dev_priv->backlight_device = psb_backlight_device;
+
+	/* This must occur after the backlight is properly initialised */
+	psb_lid_timer_init(dev_priv);
+
 	return 0;
 }
 
@@ -354,13 +358,6 @@ static int psb_chip_setup(struct drm_device *dev)
 	return 0;
 }
 
-/* Not exactly an erratum more an irritation */
-static void psb_chip_errata(struct drm_device *dev)
-{
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	psb_lid_timer_init(dev_priv);
-}
-
 static void psb_chip_teardown(struct drm_device *dev)
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
@@ -379,7 +376,6 @@ const struct psb_ops psb_chip_ops = {
 	.sgx_offset = PSB_SGX_OFFSET,
 	.chip_setup = psb_chip_setup,
 	.chip_teardown = psb_chip_teardown,
-	.errata = psb_chip_errata,
 
 	.crtc_helper = &psb_intel_helper_funcs,
 	.crtc_funcs = &psb_intel_crtc_funcs,
-- 
cgit v1.2.3


From 166973e506231c496b4427760bcb5a9860422850 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Mon, 16 Jul 2012 17:52:45 +0100
Subject: gma500: move the ASLE enable

Otherwise we end up getting the masks wrong, can get events before we
are doing power control and other ungood things.  Again this is a
regression fix where the ordering of handling was disturbed by other
work, and the user experience on some boxes is a blank screen.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/gma500/opregion.c | 8 +++-----
 drivers/gpu/drm/gma500/opregion.h | 5 +++++
 drivers/gpu/drm/gma500/psb_drv.c  | 1 +
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/gma500/opregion.c b/drivers/gpu/drm/gma500/opregion.c
index 4f186eca3a30..c430bd424681 100644
--- a/drivers/gpu/drm/gma500/opregion.c
+++ b/drivers/gpu/drm/gma500/opregion.c
@@ -144,6 +144,8 @@ struct opregion_asle {
 
 #define ASLE_CBLV_VALID         (1<<31)
 
+static struct psb_intel_opregion *system_opregion;
+
 static u32 asle_set_backlight(struct drm_device *dev, u32 bclp)
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
@@ -205,7 +207,7 @@ void psb_intel_opregion_enable_asle(struct drm_device *dev)
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	struct opregion_asle *asle = dev_priv->opregion.asle;
 
-	if (asle) {
+	if (asle && system_opregion ) {
 		/* Don't do this on Medfield or other non PC like devices, they
 		   use the bit for something different altogether */
 		psb_enable_pipestat(dev_priv, 0, PIPE_LEGACY_BLC_EVENT_ENABLE);
@@ -221,7 +223,6 @@ void psb_intel_opregion_enable_asle(struct drm_device *dev)
 #define ACPI_EV_LID            (1<<1)
 #define ACPI_EV_DOCK           (1<<2)
 
-static struct psb_intel_opregion *system_opregion;
 
 static int psb_intel_opregion_video_event(struct notifier_block *nb,
 					  unsigned long val, void *data)
@@ -266,9 +267,6 @@ void psb_intel_opregion_init(struct drm_device *dev)
 		system_opregion = opregion;
 		register_acpi_notifier(&psb_intel_opregion_notifier);
 	}
-
-	if (opregion->asle)
-		psb_intel_opregion_enable_asle(dev);
 }
 
 void psb_intel_opregion_fini(struct drm_device *dev)
diff --git a/drivers/gpu/drm/gma500/opregion.h b/drivers/gpu/drm/gma500/opregion.h
index 72dc6b921265..4a90f8b0e16c 100644
--- a/drivers/gpu/drm/gma500/opregion.h
+++ b/drivers/gpu/drm/gma500/opregion.h
@@ -27,6 +27,7 @@ extern void psb_intel_opregion_asle_intr(struct drm_device *dev);
 extern void psb_intel_opregion_init(struct drm_device *dev);
 extern void psb_intel_opregion_fini(struct drm_device *dev);
 extern int psb_intel_opregion_setup(struct drm_device *dev);
+extern void psb_intel_opregion_enable_asle(struct drm_device *dev);
 
 #else
 
@@ -46,4 +47,8 @@ extern inline int psb_intel_opregion_setup(struct drm_device *dev)
 {
 	return 0;
 }
+
+extern inline void psb_intel_opregion_enable_asle(struct drm_device *dev)
+{
+}
 #endif
diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index caba6e08693c..a8858a907f47 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -374,6 +374,7 @@ static int psb_driver_load(struct drm_device *dev, unsigned long chipset)
 
 	if (ret)
 		return ret;
+	psb_intel_opregion_enable_asle(dev);
 #if 0
 	/*enable runtime pm at last*/
 	pm_runtime_enable(&dev->pdev->dev);
-- 
cgit v1.2.3


From 64691959401601a1514c39cf0131533a4be53c12 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Mon, 16 Jul 2012 17:54:03 +0100
Subject: gma500,cdv: Fix the brightness base

Some desktop environments carefully save and restore the brightness
settings from the previous boot.  Unfortunately they don't all check to
see if the range has changed.  The end result is that they restore a
brightness of 100/lots not 100/100.

As the old driver and the non-free GMA36xx driver both use 0-100 we thus
need to go back doing the same thing to avoid users getting a mysterious
black screen after boot.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/gma500/cdv_device.c | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/gma500/cdv_device.c b/drivers/gpu/drm/gma500/cdv_device.c
index 9764045428ce..b7e7b49d8f62 100644
--- a/drivers/gpu/drm/gma500/cdv_device.c
+++ b/drivers/gpu/drm/gma500/cdv_device.c
@@ -78,21 +78,6 @@ static int cdv_backlight_combination_mode(struct drm_device *dev)
 	return REG_READ(BLC_PWM_CTL2) & PWM_LEGACY_MODE;
 }
 
-static int cdv_get_brightness(struct backlight_device *bd)
-{
-	struct drm_device *dev = bl_get_data(bd);
-	u32 val = REG_READ(BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK;
-
-	if (cdv_backlight_combination_mode(dev)) {
-		u8 lbpc;
-
-		val &= ~1;
-		pci_read_config_byte(dev->pdev, 0xF4, &lbpc);
-		val *= lbpc;
-	}
-	return val;
-}
-
 static u32 cdv_get_max_backlight(struct drm_device *dev)
 {
 	u32 max = REG_READ(BLC_PWM_CTL);
@@ -110,6 +95,22 @@ static u32 cdv_get_max_backlight(struct drm_device *dev)
 	return max;
 }
 
+static int cdv_get_brightness(struct backlight_device *bd)
+{
+	struct drm_device *dev = bl_get_data(bd);
+	u32 val = REG_READ(BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK;
+
+	if (cdv_backlight_combination_mode(dev)) {
+		u8 lbpc;
+
+		val &= ~1;
+		pci_read_config_byte(dev->pdev, 0xF4, &lbpc);
+		val *= lbpc;
+	}
+	return (val * 100)/cdv_get_max_backlight(dev);
+
+}
+
 static int cdv_set_brightness(struct backlight_device *bd)
 {
 	struct drm_device *dev = bl_get_data(bd);
@@ -120,6 +121,9 @@ static int cdv_set_brightness(struct backlight_device *bd)
 	if (level < 1)
 		level = 1;
 
+	level *= cdv_get_max_backlight(dev);
+	level /= 100;
+
 	if (cdv_backlight_combination_mode(dev)) {
 		u32 max = cdv_get_max_backlight(dev);
 		u8 lbpc;
@@ -157,7 +161,6 @@ static int cdv_backlight_init(struct drm_device *dev)
 
 	cdv_backlight_device->props.brightness =
 			cdv_get_brightness(cdv_backlight_device);
-	cdv_backlight_device->props.max_brightness = cdv_get_max_backlight(dev);
 	backlight_update_status(cdv_backlight_device);
 	dev_priv->backlight_device = cdv_backlight_device;
 	return 0;
-- 
cgit v1.2.3


From 3e997130bd2e8c6f5aaa49d6e3161d4d29b43ab0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 16 Jul 2012 12:50:42 -0400
Subject: timekeeping: Add missing update call in timekeeping_resume()

The leap second rework unearthed another issue of inconsistent data.

On timekeeping_resume() the timekeeper data is updated, but nothing
calls timekeeping_update(), so now the update code in the timer
interrupt sees stale values.

This has been the case before those changes, but then the timer
interrupt was using stale data as well so this went unnoticed for quite
some time.

Add the missing update call, so all the data is consistent everywhere.

Reported-by: Andreas Schwab <schwab@linux-m68k.org>
Reported-and-tested-by: "Rafael J. Wysocki" <rjw@sisk.pl>
Reported-and-tested-by: Martin Steigerwald <Martin@lichtvoll.de>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Linux PM list <linux-pm@vger.kernel.org>
Cc: John Stultz <johnstul@us.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>,
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/time/timekeeping.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 269b1fe5f2ae..3447cfaf11e7 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -717,6 +717,7 @@ static void timekeeping_resume(void)
 	timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
 	timekeeper.ntp_error = 0;
 	timekeeping_suspended = 0;
+	timekeeping_update(false);
 	write_sequnlock_irqrestore(&timekeeper.lock, flags);
 
 	touch_softlockup_watchdog();
-- 
cgit v1.2.3


From d35212f3ca3bf4fb49d15e37f530c9931e2d2183 Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Mon, 16 Jul 2012 15:17:10 -0700
Subject: target: Clean up returning errors in PR handling code

 - instead of (PTR_ERR(file) < 0) just use IS_ERR(file)
 - return -EINVAL instead of EINVAL
 - all other error returns in target_scsi3_emulate_pr_out() use
   "goto out" -- get rid of the one remaining straight "return."

Signed-off-by: Roland Dreier <roland@purestorage.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_pr.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 85564998500a..a1bcd927a9e6 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -2031,7 +2031,7 @@ static int __core_scsi3_write_aptpl_to_file(
 	if (IS_ERR(file) || !file || !file->f_dentry) {
 		pr_err("filp_open(%s) for APTPL metadata"
 			" failed\n", path);
-		return (PTR_ERR(file) < 0 ? PTR_ERR(file) : -ENOENT);
+		return IS_ERR(file) ? PTR_ERR(file) : -ENOENT;
 	}
 
 	iov[0].iov_base = &buf[0];
@@ -3818,7 +3818,7 @@ int target_scsi3_emulate_pr_out(struct se_cmd *cmd)
 			" SPC-2 reservation is held, returning"
 			" RESERVATION_CONFLICT\n");
 		cmd->scsi_sense_reason = TCM_RESERVATION_CONFLICT;
-		ret = EINVAL;
+		ret = -EINVAL;
 		goto out;
 	}
 
@@ -3828,7 +3828,8 @@ int target_scsi3_emulate_pr_out(struct se_cmd *cmd)
 	 */
 	if (!cmd->se_sess) {
 		cmd->scsi_sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
 	if (cmd->data_length < 24) {
-- 
cgit v1.2.3


From 1765fe5edcb83f53fc67edeb559fcf4bc82c6460 Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Mon, 16 Jul 2012 17:10:17 -0700
Subject: target: Fix range calculation in WRITE SAME emulation when num blocks
 == 0

When NUMBER OF LOGICAL BLOCKS is 0, WRITE SAME is supposed to write
all the blocks from the specified LBA through the end of the device.
However, dev->transport->get_blocks(dev) (perhaps confusingly) returns
the last valid LBA rather than the number of blocks, so the correct
number of blocks to write starting with lba is

dev->transport->get_blocks(dev) - lba + 1

(nab: Backport roland's for-3.6 patch to for-3.5)

Signed-off-by: Roland Dreier <roland@purestorage.com>
Cc: Cc: <stable@vger.kernel.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_cdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c
index 9888693a18fe..664f6e775d0e 100644
--- a/drivers/target/target_core_cdb.c
+++ b/drivers/target/target_core_cdb.c
@@ -1095,7 +1095,7 @@ int target_emulate_write_same(struct se_cmd *cmd)
 	if (num_blocks != 0)
 		range = num_blocks;
 	else
-		range = (dev->transport->get_blocks(dev) - lba);
+		range = (dev->transport->get_blocks(dev) - lba) + 1;
 
 	pr_debug("WRITE_SAME UNMAP: LBA: %llu Range: %llu\n",
 		 (unsigned long long)lba, (unsigned long long)range);
-- 
cgit v1.2.3


From ffc61ccbb96809df8d97ed609ac86b509eaf9056 Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Wed, 11 Jul 2012 12:28:05 +0100
Subject: Initialise mid_q_entry before putting it on the pending queue

A user reported a crash in cifs_demultiplex_thread() caused by an
incorrectly set mid_q_entry->callback() function. It appears that the
callback assignment made in cifs_call_async() was not flushed back to
memory suggesting that a memory barrier was required here. Changing the
code to make sure that the mid_q_entry structure was completely
initialised before it was added to the pending queue fixes the problem.

Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/transport.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 3097ee58fd7d..f25d4ea14be4 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -365,16 +365,14 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,
 	if (mid == NULL)
 		return -ENOMEM;
 
-	/* put it on the pending_mid_q */
-	spin_lock(&GlobalMid_Lock);
-	list_add_tail(&mid->qhead, &server->pending_mid_q);
-	spin_unlock(&GlobalMid_Lock);
-
 	rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number);
-	if (rc)
-		delete_mid(mid);
+	if (rc) {
+		DeleteMidQEntry(mid);
+		return rc;
+	}
+
 	*ret_mid = mid;
-	return rc;
+	return 0;
 }
 
 /*
@@ -407,17 +405,21 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
 	mid->callback_data = cbdata;
 	mid->mid_state = MID_REQUEST_SUBMITTED;
 
+	/* put it on the pending_mid_q */
+	spin_lock(&GlobalMid_Lock);
+	list_add_tail(&mid->qhead, &server->pending_mid_q);
+	spin_unlock(&GlobalMid_Lock);
+
+
 	cifs_in_send_inc(server);
 	rc = smb_sendv(server, iov, nvec);
 	cifs_in_send_dec(server);
 	cifs_save_when_sent(mid);
 	mutex_unlock(&server->srv_mutex);
 
-	if (rc)
-		goto out_err;
+	if (rc == 0)
+		return 0;
 
-	return rc;
-out_err:
 	delete_mid(mid);
 	add_credits(server, 1);
 	wake_up(&server->request_q);
-- 
cgit v1.2.3


From 3ae629d98bd5ed77585a878566f04f310adbc591 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 11 Jul 2012 09:09:35 -0400
Subject: cifs: on CONFIG_HIGHMEM machines, limit the rsize/wsize to the kmap
 space

We currently rely on being able to kmap all of the pages in an async
read or write request. If you're on a machine that has CONFIG_HIGHMEM
set then that kmap space is limited, sometimes to as low as 512 slots.

With 512 slots, we can only support up to a 2M r/wsize, and that's
assuming that we can get our greedy little hands on all of them. There
are other users however, so it's possible we'll end up stuck with a
size that large.

Since we can't handle a rsize or wsize larger than that currently, cap
those options at the number of kmap slots we have. We could consider
capping it even lower, but we currently default to a max of 1M. Might as
well allow those luddites on 32 bit arches enough rope to hang
themselves.

A more robust fix would be to teach the send and receive routines how
to contend with an array of pages so we don't need to marshal up a kvec
array at all. That's a fairly significant overhaul though, so we'll need
this limit in place until that's ready.

Cc: <stable@vger.kernel.org>
Reported-by: Jian Li <jiali@redhat.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/connect.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0ae86ddf2213..94b7788c3189 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3445,6 +3445,18 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
 #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024)
 #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536)
 
+/*
+ * On hosts with high memory, we can't currently support wsize/rsize that are
+ * larger than we can kmap at once. Cap the rsize/wsize at
+ * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request
+ * larger than that anyway.
+ */
+#ifdef CONFIG_HIGHMEM
+#define CIFS_KMAP_SIZE_LIMIT	(LAST_PKMAP * PAGE_CACHE_SIZE)
+#else /* CONFIG_HIGHMEM */
+#define CIFS_KMAP_SIZE_LIMIT	(1<<24)
+#endif /* CONFIG_HIGHMEM */
+
 static unsigned int
 cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
 {
@@ -3475,6 +3487,9 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
 		wsize = min_t(unsigned int, wsize,
 				server->maxBuf - sizeof(WRITE_REQ) + 4);
 
+	/* limit to the amount that we can kmap at once */
+	wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT);
+
 	/* hard limit of CIFS_MAX_WSIZE */
 	wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
 
@@ -3516,6 +3531,9 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
 	if (!(server->capabilities & CAP_LARGE_READ_X))
 		rsize = min_t(unsigned int, CIFSMaxBufSize, rsize);
 
+	/* limit to the amount that we can kmap at once */
+	rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT);
+
 	/* hard limit of CIFS_MAX_RSIZE */
 	rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE);
 
-- 
cgit v1.2.3


From 3cf003c08be785af4bee9ac05891a15bcbff856a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 11 Jul 2012 09:09:36 -0400
Subject: cifs: when CONFIG_HIGHMEM is set, serialize the read/write kmaps

Jian found that when he ran fsx on a 32 bit arch with a large wsize the
process and one of the bdi writeback kthreads would sometimes deadlock
with a stack trace like this:

crash> bt
PID: 2789   TASK: f02edaa0  CPU: 3   COMMAND: "fsx"
 #0 [eed63cbc] schedule at c083c5b3
 #1 [eed63d80] kmap_high at c0500ec8
 #2 [eed63db0] cifs_async_writev at f7fabcd7 [cifs]
 #3 [eed63df0] cifs_writepages at f7fb7f5c [cifs]
 #4 [eed63e50] do_writepages at c04f3e32
 #5 [eed63e54] __filemap_fdatawrite_range at c04e152a
 #6 [eed63ea4] filemap_fdatawrite at c04e1b3e
 #7 [eed63eb4] cifs_file_aio_write at f7fa111a [cifs]
 #8 [eed63ecc] do_sync_write at c052d202
 #9 [eed63f74] vfs_write at c052d4ee
#10 [eed63f94] sys_write at c052df4c
#11 [eed63fb0] ia32_sysenter_target at c0409a98
    EAX: 00000004  EBX: 00000003  ECX: abd73b73  EDX: 012a65c6
    DS:  007b      ESI: 012a65c6  ES:  007b      EDI: 00000000
    SS:  007b      ESP: bf8db178  EBP: bf8db1f8  GS:  0033
    CS:  0073      EIP: 40000424  ERR: 00000004  EFLAGS: 00000246

Each task would kmap part of its address array before getting stuck, but
not enough to actually issue the write.

This patch fixes this by serializing the marshal_iov operations for
async reads and writes. The idea here is to ensure that cifs
aggressively tries to populate a request before attempting to fulfill
another one. As soon as all of the pages are kmapped for a request, then
we can unlock and allow another one to proceed.

There's no need to do this serialization on non-CONFIG_HIGHMEM arches
however, so optimize all of this out when CONFIG_HIGHMEM isn't set.

Cc: <stable@vger.kernel.org>
Reported-by: Jian Li <jiali@redhat.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifssmb.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 5b400730c213..4ee522b3f66f 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -86,7 +86,31 @@ static struct {
 #endif /* CONFIG_CIFS_WEAK_PW_HASH */
 #endif /* CIFS_POSIX */
 
-/* Forward declarations */
+#ifdef CONFIG_HIGHMEM
+/*
+ * On arches that have high memory, kmap address space is limited. By
+ * serializing the kmap operations on those arches, we ensure that we don't
+ * end up with a bunch of threads in writeback with partially mapped page
+ * arrays, stuck waiting for kmap to come back. That situation prevents
+ * progress and can deadlock.
+ */
+static DEFINE_MUTEX(cifs_kmap_mutex);
+
+static inline void
+cifs_kmap_lock(void)
+{
+	mutex_lock(&cifs_kmap_mutex);
+}
+
+static inline void
+cifs_kmap_unlock(void)
+{
+	mutex_unlock(&cifs_kmap_mutex);
+}
+#else /* !CONFIG_HIGHMEM */
+#define cifs_kmap_lock() do { ; } while(0)
+#define cifs_kmap_unlock() do { ; } while(0)
+#endif /* CONFIG_HIGHMEM */
 
 /* Mark as invalid, all open files on tree connections since they
    were closed when session to server was lost */
@@ -1503,7 +1527,9 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 	}
 
 	/* marshal up the page array */
+	cifs_kmap_lock();
 	len = rdata->marshal_iov(rdata, data_len);
+	cifs_kmap_unlock();
 	data_len -= len;
 
 	/* issue the read if we have any iovecs left to fill */
@@ -2069,7 +2095,9 @@ cifs_async_writev(struct cifs_writedata *wdata)
 	 * and set the iov_len properly for each one. It may also set
 	 * wdata->bytes too.
 	 */
+	cifs_kmap_lock();
 	wdata->marshal_iov(iov, wdata);
+	cifs_kmap_unlock();
 
 	cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
 
-- 
cgit v1.2.3


From cd60042cc1392e79410dc8de9e9c1abb38a29e57 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 6 Jul 2012 07:09:42 -0400
Subject: cifs: always update the inode cache with the results from a FIND_*

When we get back a FIND_FIRST/NEXT result, we have some info about the
dentry that we use to instantiate a new inode. We were ignoring and
discarding that info when we had an existing dentry in the cache.

Fix this by updating the inode in place when we find an existing dentry
and the uniqueid is the same.

Cc: <stable@vger.kernel.org> # .31.x
Reported-and-Tested-by: Andrew Bartlett <abartlet@samba.org>
Reported-by: Bill Robertson <bill_robertson@debortoli.com.au>
Reported-by: Dion Edwards <dion_edwards@debortoli.com.au>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/readdir.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 0a8224d1c4c5..a4217f02fab2 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -86,9 +86,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
 
 	dentry = d_lookup(parent, name);
 	if (dentry) {
-		/* FIXME: check for inode number changes? */
-		if (dentry->d_inode != NULL)
+		inode = dentry->d_inode;
+		/* update inode in place if i_ino didn't change */
+		if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) {
+			cifs_fattr_to_inode(inode, fattr);
 			return dentry;
+		}
 		d_drop(dentry);
 		dput(dentry);
 	}
-- 
cgit v1.2.3


From 2eebc1e188e9e45886ee00662519849339884d6d Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 16 Jul 2012 09:13:51 +0000
Subject: sctp: Fix list corruption resulting from freeing an association on a
 list

A few days ago Dave Jones reported this oops:

[22766.294255] general protection fault: 0000 [#1] PREEMPT SMP
[22766.295376] CPU 0
[22766.295384] Modules linked in:
[22766.387137]  ffffffffa169f292 6b6b6b6b6b6b6b6b ffff880147c03a90
ffff880147c03a74
[22766.387135] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 00000000000
[22766.387136] Process trinity-watchdo (pid: 10896, threadinfo ffff88013e7d2000,
[22766.387137] Stack:
[22766.387140]  ffff880147c03a10
[22766.387140]  ffffffffa169f2b6
[22766.387140]  ffff88013ed95728
[22766.387143]  0000000000000002
[22766.387143]  0000000000000000
[22766.387143]  ffff880003fad062
[22766.387144]  ffff88013c120000
[22766.387144]
[22766.387145] Call Trace:
[22766.387145]  <IRQ>
[22766.387150]  [<ffffffffa169f292>] ? __sctp_lookup_association+0x62/0xd0
[sctp]
[22766.387154]  [<ffffffffa169f2b6>] __sctp_lookup_association+0x86/0xd0 [sctp]
[22766.387157]  [<ffffffffa169f597>] sctp_rcv+0x207/0xbb0 [sctp]
[22766.387161]  [<ffffffff810d4da8>] ? trace_hardirqs_off_caller+0x28/0xd0
[22766.387163]  [<ffffffff815827e3>] ? nf_hook_slow+0x133/0x210
[22766.387166]  [<ffffffff815902fc>] ? ip_local_deliver_finish+0x4c/0x4c0
[22766.387168]  [<ffffffff8159043d>] ip_local_deliver_finish+0x18d/0x4c0
[22766.387169]  [<ffffffff815902fc>] ? ip_local_deliver_finish+0x4c/0x4c0
[22766.387171]  [<ffffffff81590a07>] ip_local_deliver+0x47/0x80
[22766.387172]  [<ffffffff8158fd80>] ip_rcv_finish+0x150/0x680
[22766.387174]  [<ffffffff81590c54>] ip_rcv+0x214/0x320
[22766.387176]  [<ffffffff81558c07>] __netif_receive_skb+0x7b7/0x910
[22766.387178]  [<ffffffff8155856c>] ? __netif_receive_skb+0x11c/0x910
[22766.387180]  [<ffffffff810d423e>] ? put_lock_stats.isra.25+0xe/0x40
[22766.387182]  [<ffffffff81558f83>] netif_receive_skb+0x23/0x1f0
[22766.387183]  [<ffffffff815596a9>] ? dev_gro_receive+0x139/0x440
[22766.387185]  [<ffffffff81559280>] napi_skb_finish+0x70/0xa0
[22766.387187]  [<ffffffff81559cb5>] napi_gro_receive+0xf5/0x130
[22766.387218]  [<ffffffffa01c4679>] e1000_receive_skb+0x59/0x70 [e1000e]
[22766.387242]  [<ffffffffa01c5aab>] e1000_clean_rx_irq+0x28b/0x460 [e1000e]
[22766.387266]  [<ffffffffa01c9c18>] e1000e_poll+0x78/0x430 [e1000e]
[22766.387268]  [<ffffffff81559fea>] net_rx_action+0x1aa/0x3d0
[22766.387270]  [<ffffffff810a495f>] ? account_system_vtime+0x10f/0x130
[22766.387273]  [<ffffffff810734d0>] __do_softirq+0xe0/0x420
[22766.387275]  [<ffffffff8169826c>] call_softirq+0x1c/0x30
[22766.387278]  [<ffffffff8101db15>] do_softirq+0xd5/0x110
[22766.387279]  [<ffffffff81073bc5>] irq_exit+0xd5/0xe0
[22766.387281]  [<ffffffff81698b03>] do_IRQ+0x63/0xd0
[22766.387283]  [<ffffffff8168ee2f>] common_interrupt+0x6f/0x6f
[22766.387283]  <EOI>
[22766.387284]
[22766.387285]  [<ffffffff8168eed9>] ? retint_swapgs+0x13/0x1b
[22766.387285] Code: c0 90 5d c3 66 0f 1f 44 00 00 4c 89 c8 5d c3 0f 1f 00 55 48
89 e5 48 83
ec 20 48 89 5d e8 4c 89 65 f0 4c 89 6d f8 66 66 66 66 90 <0f> b7 87 98 00 00 00
48 89 fb
49 89 f5 66 c1 c0 08 66 39 46 02
[22766.387307]
[22766.387307] RIP
[22766.387311]  [<ffffffffa168a2c9>] sctp_assoc_is_match+0x19/0x90 [sctp]
[22766.387311]  RSP <ffff880147c039b0>
[22766.387142]  ffffffffa16ab120
[22766.599537] ---[ end trace 3f6dae82e37b17f5 ]---
[22766.601221] Kernel panic - not syncing: Fatal exception in interrupt

It appears from his analysis and some staring at the code that this is likely
occuring because an association is getting freed while still on the
sctp_assoc_hashtable.  As a result, we get a gpf when traversing the hashtable
while a freed node corrupts part of the list.

Nominally I would think that an mibalanced refcount was responsible for this,
but I can't seem to find any obvious imbalance.  What I did note however was
that the two places where we create an association using
sctp_primitive_ASSOCIATE (__sctp_connect and sctp_sendmsg), have failure paths
which free a newly created association after calling sctp_primitive_ASSOCIATE.
sctp_primitive_ASSOCIATE brings us into the sctp_sf_do_prm_asoc path, which
issues a SCTP_CMD_NEW_ASOC side effect, which in turn adds a new association to
the aforementioned hash table.  the sctp command interpreter that process side
effects has not way to unwind previously processed commands, so freeing the
association from the __sctp_connect or sctp_sendmsg error path would lead to a
freed association remaining on this hash table.

I've fixed this but modifying sctp_[un]hash_established to use hlist_del_init,
which allows us to proerly use hlist_unhashed to check if the node is on a
hashlist safely during a delete.  That in turn alows us to safely call
sctp_unhash_established in the __sctp_connect and sctp_sendmsg error paths
before freeing them, regardles of what the associations state is on the hash
list.

I noted, while I was doing this, that the __sctp_unhash_endpoint was using
hlist_unhsashed in a simmilar fashion, but never nullified any removed nodes
pointers to make that function work properly, so I fixed that up in a simmilar
fashion.

I attempted to test this using a virtual guest running the SCTP_RR test from
netperf in a loop while running the trinity fuzzer, both in a loop.  I wasn't
able to recreate the problem prior to this fix, nor was I able to trigger the
failure after (neither of which I suppose is suprising).  Given the trace above
however, I think its likely that this is what we hit.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Reported-by: davej@redhat.com
CC: davej@redhat.com
CC: "David S. Miller" <davem@davemloft.net>
CC: Vlad Yasevich <vyasevich@gmail.com>
CC: Sridhar Samudrala <sri@us.ibm.com>
CC: linux-sctp@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c  |  7 ++-----
 net/sctp/socket.c | 12 ++++++++++--
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/net/sctp/input.c b/net/sctp/input.c
index 80564fe03024..8b9b6790a3df 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -736,15 +736,12 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep)
 
 	epb = &ep->base;
 
-	if (hlist_unhashed(&epb->node))
-		return;
-
 	epb->hashent = sctp_ep_hashfn(epb->bind_addr.port);
 
 	head = &sctp_ep_hashtable[epb->hashent];
 
 	sctp_write_lock(&head->lock);
-	__hlist_del(&epb->node);
+	hlist_del_init(&epb->node);
 	sctp_write_unlock(&head->lock);
 }
 
@@ -825,7 +822,7 @@ static void __sctp_unhash_established(struct sctp_association *asoc)
 	head = &sctp_assoc_hashtable[epb->hashent];
 
 	sctp_write_lock(&head->lock);
-	__hlist_del(&epb->node);
+	hlist_del_init(&epb->node);
 	sctp_write_unlock(&head->lock);
 }
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b3b8a8d813eb..31c7bfcd9b58 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1231,8 +1231,14 @@ out_free:
 	SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p"
 			  " kaddrs: %p err: %d\n",
 			  asoc, kaddrs, err);
-	if (asoc)
+	if (asoc) {
+		/* sctp_primitive_ASSOCIATE may have added this association
+		 * To the hash table, try to unhash it, just in case, its a noop
+		 * if it wasn't hashed so we're safe
+		 */
+		sctp_unhash_established(asoc);
 		sctp_association_free(asoc);
+	}
 	return err;
 }
 
@@ -1942,8 +1948,10 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 	goto out_unlock;
 
 out_free:
-	if (new_asoc)
+	if (new_asoc) {
+		sctp_unhash_established(asoc);
 		sctp_association_free(asoc);
+	}
 out_unlock:
 	sctp_release_sock(sk);
 
-- 
cgit v1.2.3


From 936597631dd310e220544dc5c6075d924efd39b2 Mon Sep 17 00:00:00 2001
From: Narendra K <narendra_k@dell.com>
Date: Mon, 16 Jul 2012 15:24:41 +0000
Subject: ixgbevf: Prevent RX/TX statistics getting reset to zero

The commit 4197aa7bb81877ebb06e4f2cc1b5fea2da23a7bd implements 64 bit
per ring statistics. But the driver resets the 'total_bytes' and
'total_packets' from RX and TX rings in the RX and TX interrupt
handlers to zero. This results in statistics being lost and user space
reporting RX and TX statistics as zero. This patch addresses the
issue by preventing the resetting of RX and TX ring statistics to
zero.

Signed-off-by: Narendra K <narendra_k@dell.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index f69ec4288b10..8b304a43a22d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -969,8 +969,6 @@ static irqreturn_t ixgbevf_msix_clean_tx(int irq, void *data)
 	r_idx = find_first_bit(q_vector->txr_idx, adapter->num_tx_queues);
 	for (i = 0; i < q_vector->txr_count; i++) {
 		tx_ring = &(adapter->tx_ring[r_idx]);
-		tx_ring->total_bytes = 0;
-		tx_ring->total_packets = 0;
 		ixgbevf_clean_tx_irq(adapter, tx_ring);
 		r_idx = find_next_bit(q_vector->txr_idx, adapter->num_tx_queues,
 				      r_idx + 1);
@@ -994,16 +992,6 @@ static irqreturn_t ixgbevf_msix_clean_rx(int irq, void *data)
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct ixgbevf_ring  *rx_ring;
 	int r_idx;
-	int i;
-
-	r_idx = find_first_bit(q_vector->rxr_idx, adapter->num_rx_queues);
-	for (i = 0; i < q_vector->rxr_count; i++) {
-		rx_ring = &(adapter->rx_ring[r_idx]);
-		rx_ring->total_bytes = 0;
-		rx_ring->total_packets = 0;
-		r_idx = find_next_bit(q_vector->rxr_idx, adapter->num_rx_queues,
-				      r_idx + 1);
-	}
 
 	if (!q_vector->rxr_count)
 		return IRQ_HANDLED;
-- 
cgit v1.2.3


From ef209f15980360f6945873df3cd710c5f62f2a3e Mon Sep 17 00:00:00 2001
From: Gao feng <gaofeng@cn.fujitsu.com>
Date: Wed, 11 Jul 2012 21:50:15 +0000
Subject: net: cgroup: fix access the unallocated memory in netprio cgroup

there are some out of bound accesses in netprio cgroup.

now before accessing the dev->priomap.priomap array,we only check
if the dev->priomap exist.and because we don't want to see
additional bound checkings in fast path, so we should make sure
that dev->priomap is null or array size of dev->priomap.priomap
is equal to max_prioidx + 1;

so in write_priomap logic,we should call extend_netdev_table when
dev->priomap is null and dev->priomap.priomap_len < max_len.
and in cgrp_create->update_netdev_tables logic,we should call
extend_netdev_table only when dev->priomap exist and
dev->priomap.priomap_len < max_len.

and it's not needed to call update_netdev_tables in write_priomap,
we can only allocate the net device's priomap which we change through
net_prio.ifpriomap.

this patch also add a return value for update_netdev_tables &
extend_netdev_table, so when new_priomap is allocated failed,
write_priomap will stop to access the priomap,and return -ENOMEM
back to the userspace to tell the user what happend.

Change From v3:
1. add rtnl protect when reading max_prioidx in write_priomap.

2. only call extend_netdev_table when map->priomap_len < max_len,
   this will make sure array size of dev->map->priomap always
   bigger than any prioidx.

3. add a function write_update_netdev_table to make codes clear.

Change From v2:
1. protect extend_netdev_table by RTNL.
2. when extend_netdev_table failed,call dev_put to reduce device's refcount.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Eric Dumazet <edumazet@google.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netprio_cgroup.c | 71 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 54 insertions(+), 17 deletions(-)

diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 3e953eaddbfc..b2e9caa1ad1a 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -65,7 +65,7 @@ static void put_prioidx(u32 idx)
 	spin_unlock_irqrestore(&prioidx_map_lock, flags);
 }
 
-static void extend_netdev_table(struct net_device *dev, u32 new_len)
+static int extend_netdev_table(struct net_device *dev, u32 new_len)
 {
 	size_t new_size = sizeof(struct netprio_map) +
 			   ((sizeof(u32) * new_len));
@@ -77,7 +77,7 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len)
 
 	if (!new_priomap) {
 		pr_warn("Unable to alloc new priomap!\n");
-		return;
+		return -ENOMEM;
 	}
 
 	for (i = 0;
@@ -90,46 +90,79 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len)
 	rcu_assign_pointer(dev->priomap, new_priomap);
 	if (old_priomap)
 		kfree_rcu(old_priomap, rcu);
+	return 0;
 }
 
-static void update_netdev_tables(void)
+static int write_update_netdev_table(struct net_device *dev)
 {
+	int ret = 0;
+	u32 max_len;
+	struct netprio_map *map;
+
+	rtnl_lock();
+	max_len = atomic_read(&max_prioidx) + 1;
+	map = rtnl_dereference(dev->priomap);
+	if (!map || map->priomap_len < max_len)
+		ret = extend_netdev_table(dev, max_len);
+	rtnl_unlock();
+
+	return ret;
+}
+
+static int update_netdev_tables(void)
+{
+	int ret = 0;
 	struct net_device *dev;
-	u32 max_len = atomic_read(&max_prioidx) + 1;
+	u32 max_len;
 	struct netprio_map *map;
 
 	rtnl_lock();
+	max_len = atomic_read(&max_prioidx) + 1;
 	for_each_netdev(&init_net, dev) {
 		map = rtnl_dereference(dev->priomap);
-		if ((!map) ||
-		    (map->priomap_len < max_len))
-			extend_netdev_table(dev, max_len);
+		/*
+		 * don't allocate priomap if we didn't
+		 * change net_prio.ifpriomap (map == NULL),
+		 * this will speed up skb_update_prio.
+		 */
+		if (map && map->priomap_len < max_len) {
+			ret = extend_netdev_table(dev, max_len);
+			if (ret < 0)
+				break;
+		}
 	}
 	rtnl_unlock();
+	return ret;
 }
 
 static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
 {
 	struct cgroup_netprio_state *cs;
-	int ret;
+	int ret = -EINVAL;
 
 	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
 	if (!cs)
 		return ERR_PTR(-ENOMEM);
 
-	if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) {
-		kfree(cs);
-		return ERR_PTR(-EINVAL);
-	}
+	if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx)
+		goto out;
 
 	ret = get_prioidx(&cs->prioidx);
-	if (ret != 0) {
+	if (ret < 0) {
 		pr_warn("No space in priority index array\n");
-		kfree(cs);
-		return ERR_PTR(ret);
+		goto out;
+	}
+
+	ret = update_netdev_tables();
+	if (ret < 0) {
+		put_prioidx(cs->prioidx);
+		goto out;
 	}
 
 	return &cs->css;
+out:
+	kfree(cs);
+	return ERR_PTR(ret);
 }
 
 static void cgrp_destroy(struct cgroup *cgrp)
@@ -221,13 +254,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
 	if (!dev)
 		goto out_free_devname;
 
-	update_netdev_tables();
-	ret = 0;
+	ret = write_update_netdev_table(dev);
+	if (ret < 0)
+		goto out_put_dev;
+
 	rcu_read_lock();
 	map = rcu_dereference(dev->priomap);
 	if (map)
 		map->priomap[prioidx] = priority;
 	rcu_read_unlock();
+
+out_put_dev:
 	dev_put(dev);
 
 out_free_devname:
-- 
cgit v1.2.3


From 96f80d123eff05c3cd4701463786b87952a6c3ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sjur=20Br=C3=A6ndeland?= <sjur.brandeland@stericsson.com>
Date: Sun, 15 Jul 2012 10:10:14 +0000
Subject: caif: Fix access to freed pernet memory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

unregister_netdevice_notifier() must be called before
unregister_pernet_subsys() to avoid accessing already freed
pernet memory. This fixes the following oops when doing rmmod:

Call Trace:
 [<ffffffffa0f802bd>] caif_device_notify+0x4d/0x5a0 [caif]
 [<ffffffff81552ba9>] unregister_netdevice_notifier+0xb9/0x100
 [<ffffffffa0f86dcc>] caif_device_exit+0x1c/0x250 [caif]
 [<ffffffff810e7734>] sys_delete_module+0x1a4/0x300
 [<ffffffff810da82d>] ? trace_hardirqs_on_caller+0x15d/0x1e0
 [<ffffffff813517de>] ? trace_hardirqs_on_thunk+0x3a/0x3
 [<ffffffff81696bad>] system_call_fastpath+0x1a/0x1f

RIP
 [<ffffffffa0f7f561>] caif_get+0x51/0xb0 [caif]

Signed-off-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 554b31289607..8c83c175b03a 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -561,9 +561,9 @@ static int __init caif_device_init(void)
 
 static void __exit caif_device_exit(void)
 {
-	unregister_pernet_subsys(&caif_net_ops);
 	unregister_netdevice_notifier(&caif_device_notifier);
 	dev_remove_pack(&caif_packet_type);
+	unregister_pernet_subsys(&caif_net_ops);
 }
 
 module_init(caif_device_init);
-- 
cgit v1.2.3


From 68653359beb59014b88e4f297e467730a1680996 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Fri, 13 Jul 2012 20:15:34 +0000
Subject: MAINTAINERS: reflect actual changes in IEEE 802.15.4 maintainership

As the life flows, developers priorities shifts a bit. Reflect actual
changes in the maintainership of IEEE 802.15.4 code: Sergey mostly
stopped cared about this piece of code. Most of the work recently was
done by Alexander, so put him to the MAINTAINERS file to reflect his
status and to ease the life of respective patches.

Also add new net/mac802154/ directory to the list of maintained files.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 03df1d15ebf3..3064a9c3b3e7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3433,13 +3433,14 @@ S:	Supported
 F:	drivers/idle/i7300_idle.c
 
 IEEE 802.15.4 SUBSYSTEM
+M:	Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
 M:	Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
-M:	Sergey Lapin <slapin@ossfans.org>
 L:	linux-zigbee-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	http://apps.sourceforge.net/trac/linux-zigbee
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lowpan/lowpan.git
 S:	Maintained
 F:	net/ieee802154/
+F:	net/mac802154/
 F:	drivers/ieee802154/
 
 IIO SUBSYSTEM AND DRIVERS
-- 
cgit v1.2.3


From ef764a13b8f4bc4b532a51bc37c35b3974831b29 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Fri, 13 Jul 2012 06:33:08 +0000
Subject: ax25: Fix missing break

At least there seems to be no reason to disallow ROSE sockets when
NETROM is loaded.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/af_ax25.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 051f7abae66d..779095ded689 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -842,6 +842,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
 		case AX25_P_NETROM:
 			if (ax25_protocol_is_registered(AX25_P_NETROM))
 				return -ESOCKTNOSUPPORT;
+			break;
 #endif
 #ifdef CONFIG_ROSE_MODULE
 		case AX25_P_ROSE:
-- 
cgit v1.2.3


From 10cc1bdd5ef65f60f570b594c4c066d763c128fb Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 16 Jul 2012 23:44:48 +0000
Subject: ixgbevf: Fix panic when loading driver

This patch addresses a kernel panic seen when setting up the interface.
Specifically we see a NULL pointer dereference on the Tx descriptor cleanup
path when enabling interrupts.  This change corrects that so it cannot
occur.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Acked-by: Greg Rose <gregory.v.rose@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 8b304a43a22d..41e32257a4e8 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -201,6 +201,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_adapter *adapter,
 	unsigned int i, eop, count = 0;
 	unsigned int total_bytes = 0, total_packets = 0;
 
+	if (test_bit(__IXGBEVF_DOWN, &adapter->state))
+		return true;
+
 	i = tx_ring->next_to_clean;
 	eop = tx_ring->tx_buffer_info[i].next_to_watch;
 	eop_desc = IXGBE_TX_DESC_ADV(*tx_ring, eop);
-- 
cgit v1.2.3


From 9e33ce453f8ac8452649802bee1f410319408f4b Mon Sep 17 00:00:00 2001
From: Lin Ming <mlin@ss.pku.edu.cn>
Date: Sat, 7 Jul 2012 18:26:10 +0800
Subject: ipvs: fix oops on NAT reply in br_nf context

IPVS should not reset skb->nf_bridge in FORWARD hook
by calling nf_reset for NAT replies. It triggers oops in
br_nf_forward_finish.

[  579.781508] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004
[  579.781669] IP: [<ffffffff817b1ca5>] br_nf_forward_finish+0x58/0x112
[  579.781792] PGD 218f9067 PUD 0
[  579.781865] Oops: 0000 [#1] SMP
[  579.781945] CPU 0
[  579.781983] Modules linked in:
[  579.782047]
[  579.782080]
[  579.782114] Pid: 4644, comm: qemu Tainted: G        W    3.5.0-rc5-00006-g95e69f9 #282 Hewlett-Packard  /30E8
[  579.782300] RIP: 0010:[<ffffffff817b1ca5>]  [<ffffffff817b1ca5>] br_nf_forward_finish+0x58/0x112
[  579.782455] RSP: 0018:ffff88007b003a98  EFLAGS: 00010287
[  579.782541] RAX: 0000000000000008 RBX: ffff8800762ead00 RCX: 000000000001670a
[  579.782653] RDX: 0000000000000000 RSI: 000000000000000a RDI: ffff8800762ead00
[  579.782845] RBP: ffff88007b003ac8 R08: 0000000000016630 R09: ffff88007b003a90
[  579.782957] R10: ffff88007b0038e8 R11: ffff88002da37540 R12: ffff88002da01a02
[  579.783066] R13: ffff88002da01a80 R14: ffff88002d83c000 R15: ffff88002d82a000
[  579.783177] FS:  0000000000000000(0000) GS:ffff88007b000000(0063) knlGS:00000000f62d1b70
[  579.783306] CS:  0010 DS: 002b ES: 002b CR0: 000000008005003b
[  579.783395] CR2: 0000000000000004 CR3: 00000000218fe000 CR4: 00000000000027f0
[  579.783505] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  579.783684] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  579.783795] Process qemu (pid: 4644, threadinfo ffff880021b20000, task ffff880021aba760)
[  579.783919] Stack:
[  579.783959]  ffff88007693cedc ffff8800762ead00 ffff88002da01a02 ffff8800762ead00
[  579.784110]  ffff88002da01a02 ffff88002da01a80 ffff88007b003b18 ffffffff817b26c7
[  579.784260]  ffff880080000000 ffffffff81ef59f0 ffff8800762ead00 ffffffff81ef58b0
[  579.784477] Call Trace:
[  579.784523]  <IRQ>
[  579.784562]
[  579.784603]  [<ffffffff817b26c7>] br_nf_forward_ip+0x275/0x2c8
[  579.784707]  [<ffffffff81704b58>] nf_iterate+0x47/0x7d
[  579.784797]  [<ffffffff817ac32e>] ? br_dev_queue_push_xmit+0xae/0xae
[  579.784906]  [<ffffffff81704bfb>] nf_hook_slow+0x6d/0x102
[  579.784995]  [<ffffffff817ac32e>] ? br_dev_queue_push_xmit+0xae/0xae
[  579.785175]  [<ffffffff8187fa95>] ? _raw_write_unlock_bh+0x19/0x1b
[  579.785179]  [<ffffffff817ac417>] __br_forward+0x97/0xa2
[  579.785179]  [<ffffffff817ad366>] br_handle_frame_finish+0x1a6/0x257
[  579.785179]  [<ffffffff817b2386>] br_nf_pre_routing_finish+0x26d/0x2cb
[  579.785179]  [<ffffffff817b2cf0>] br_nf_pre_routing+0x55d/0x5c1
[  579.785179]  [<ffffffff81704b58>] nf_iterate+0x47/0x7d
[  579.785179]  [<ffffffff817ad1c0>] ? br_handle_local_finish+0x44/0x44
[  579.785179]  [<ffffffff81704bfb>] nf_hook_slow+0x6d/0x102
[  579.785179]  [<ffffffff817ad1c0>] ? br_handle_local_finish+0x44/0x44
[  579.785179]  [<ffffffff81551525>] ? sky2_poll+0xb35/0xb54
[  579.785179]  [<ffffffff817ad62a>] br_handle_frame+0x213/0x229
[  579.785179]  [<ffffffff817ad417>] ? br_handle_frame_finish+0x257/0x257
[  579.785179]  [<ffffffff816e3b47>] __netif_receive_skb+0x2b4/0x3f1
[  579.785179]  [<ffffffff816e69fc>] process_backlog+0x99/0x1e2
[  579.785179]  [<ffffffff816e6800>] net_rx_action+0xdf/0x242
[  579.785179]  [<ffffffff8107e8a8>] __do_softirq+0xc1/0x1e0
[  579.785179]  [<ffffffff8135a5ba>] ? trace_hardirqs_off_thunk+0x3a/0x6c
[  579.785179]  [<ffffffff8188812c>] call_softirq+0x1c/0x30

The steps to reproduce as follow,

1. On Host1, setup brige br0(192.168.1.106)
2. Boot a kvm guest(192.168.1.105) on Host1 and start httpd
3. Start IPVS service on Host1
   ipvsadm -A -t 192.168.1.106:80 -s rr
   ipvsadm -a -t 192.168.1.106:80 -r 192.168.1.105:80 -m
4. Run apache benchmark on Host2(192.168.1.101)
   ab -n 1000 http://192.168.1.106/

ip_vs_reply4
  ip_vs_out
    handle_response
      ip_vs_notrack
        nf_reset()
        {
          skb->nf_bridge = NULL;
        }

Actually, IPVS wants in this case just to replace nfct
with untracked version. So replace the nf_reset(skb) call
in ip_vs_notrack() with a nf_conntrack_put(skb->nfct) call.

Signed-off-by: Lin Ming <mlin@ss.pku.edu.cn>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/ip_vs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index d6146b4811c2..95374d1696a1 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1425,7 +1425,7 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 
 	if (!ct || !nf_ct_is_untracked(ct)) {
-		nf_reset(skb);
+		nf_conntrack_put(skb->nfct);
 		skb->nfct = &nf_ct_untracked_get()->ct_general;
 		skb->nfctinfo = IP_CT_NEW;
 		nf_conntrack_get(skb->nfct);
-- 
cgit v1.2.3


From 283283c4da91adc44b03519f434ee1e7e91d6fdb Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 7 Jul 2012 20:30:11 +0300
Subject: ipvs: fix oops in ip_vs_dst_event on rmmod

	After commit 39f618b4fd95ae243d940ec64c961009c74e3333 (3.4)
"ipvs: reset ipvs pointer in netns" we can oops in
ip_vs_dst_event on rmmod ip_vs because ip_vs_control_cleanup
is called after the ipvs_core_ops subsys is unregistered and
net->ipvs is NULL. Fix it by exiting early from ip_vs_dst_event
if ipvs is NULL. It is safe because all services and dests
for the net are already freed.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index d43e3c122f7b..84444dda194b 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1521,11 +1521,12 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
 {
 	struct net_device *dev = ptr;
 	struct net *net = dev_net(dev);
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_service *svc;
 	struct ip_vs_dest *dest;
 	unsigned int idx;
 
-	if (event != NETDEV_UNREGISTER)
+	if (event != NETDEV_UNREGISTER || !ipvs)
 		return NOTIFY_DONE;
 	IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
 	EnterFunction(2);
@@ -1551,7 +1552,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
 		}
 	}
 
-	list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) {
+	list_for_each_entry(dest, &ipvs->dest_trash, n_list) {
 		__ip_vs_dev_reset(dest, dev);
 	}
 	mutex_unlock(&__ip_vs_mutex);
-- 
cgit v1.2.3


From 9634252617441991b01dacaf4040866feecaf36f Mon Sep 17 00:00:00 2001
From: Federico Fuga <fuga@studiofuga.com>
Date: Mon, 16 Jul 2012 10:36:51 +0300
Subject: rpmsg: fix dependency on initialization order

When rpmsg drivers are built into the kernel, they must not initialize
before the rpmsg bus does, otherwise they'd trigger a BUG() in
drivers/base/driver.c line 169 (driver_register()).

To fix that, and to stop depending on arbitrary linkage ordering of
those built-in rpmsg drivers, we make the rpmsg bus initialize at
subsys_initcall.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Federico Fuga <fuga@studiofuga.com>
[ohad: rewrite the commit log]
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
---
 drivers/rpmsg/virtio_rpmsg_bus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 39d3aa41adda..f56c8ba3a861 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -1085,7 +1085,7 @@ static int __init rpmsg_init(void)
 
 	return ret;
 }
-module_init(rpmsg_init);
+subsys_initcall(rpmsg_init);
 
 static void __exit rpmsg_fini(void)
 {
-- 
cgit v1.2.3


From d9914cf66181b8aa0929775f5c6f675c6ebc3eb5 Mon Sep 17 00:00:00 2001
From: Michael Kerrisk <mtk.manpages@gmail.com>
Date: Tue, 17 Jul 2012 21:37:27 +0200
Subject: PM: Rename CAP_EPOLLWAKEUP to CAP_BLOCK_SUSPEND

As discussed in
http://thread.gmane.org/gmane.linux.kernel/1249726/focus=1288990,
the capability introduced in 4d7e30d98939a0340022ccd49325a3d70f7e0238
to govern EPOLLWAKEUP seems misnamed: this capability is about governing
the ability to suspend the system, not using a particular API flag
(EPOLLWAKEUP). We should make the name of the capability more general
to encourage reuse in related cases. (Whether or not this capability
should also be used to govern the use of /sys/power/wake_lock is a
question that needs to be separately resolved.)

This patch renames the capability to CAP_BLOCK_SUSPEND. In order to ensure
that the old capability name doesn't make it out into the wild, could you
please apply and push up the tree to ensure that it is incorporated
for the 3.5 release.

Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 fs/eventpoll.c             | 2 +-
 include/linux/capability.h | 6 +++---
 include/linux/eventpoll.h  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 74598f67efeb..1c8b55670804 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1710,7 +1710,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
 		goto error_tgt_fput;
 
 	/* Check if EPOLLWAKEUP is allowed */
-	if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP))
+	if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND))
 		epds.events &= ~EPOLLWAKEUP;
 
 	/*
diff --git a/include/linux/capability.h b/include/linux/capability.h
index 68d56effc328..d10b7ed595b1 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -360,11 +360,11 @@ struct cpu_vfs_cap_data {
 
 #define CAP_WAKE_ALARM            35
 
-/* Allow preventing system suspends while epoll events are pending */
+/* Allow preventing system suspends */
 
-#define CAP_EPOLLWAKEUP      36
+#define CAP_BLOCK_SUSPEND    36
 
-#define CAP_LAST_CAP         CAP_EPOLLWAKEUP
+#define CAP_LAST_CAP         CAP_BLOCK_SUSPEND
 
 #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
 
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 6f8be328770a..f4bb378ccf6a 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -34,7 +34,7 @@
  * re-allowed until epoll_wait is called again after consuming the wakeup
  * event(s).
  *
- * Requires CAP_EPOLLWAKEUP
+ * Requires CAP_BLOCK_SUSPEND
  */
 #define EPOLLWAKEUP (1 << 29)
 
-- 
cgit v1.2.3


From c8f4a2d095bcb7ff798f984b9c7d16b4c8d194c3 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 17 Jul 2012 15:47:51 -0700
Subject: bootmem: make ___alloc_bootmem_node_nopanic() really nopanic

In reaction to commit 99ab7b19440a ("mm: sparse: fix usemap allocation
above node descriptor section") Johannes said:
| while backporting the below patch, I realised that your fix busted
| f5bf18fa22f8 again.  The problem was not a panicking version on
| allocation failure but when the usemap size was too large such that
| goal + size > limit triggers the BUG_ON in the bootmem allocator.  So
| we need a version that passes limit ONLY if the usemap is smaller than
| the section.

after checking the code, the name of ___alloc_bootmem_node_nopanic()
does not reflect the fact.

Make bootmem really not panic.

Hope will kill bootmem sooner.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: <stable@vger.kernel.org>    [3.3.x, 3.4.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/bootmem.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 73096630cb35..bcb63ac48cc5 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -710,6 +710,10 @@ again:
 	if (ptr)
 		return ptr;
 
+	/* do not panic in alloc_bootmem_bdata() */
+	if (limit && goal + size > limit)
+		limit = 0;
+
 	ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit);
 	if (ptr)
 		return ptr;
-- 
cgit v1.2.3


From b45f9330b4e08ee4de5e2f5cae75200edb283a96 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 17 Jul 2012 15:47:54 -0700
Subject: mn10300: fix "pull clearing RESTORE_SIGMASK into block_sigmask()"
 fallout

Commit a610d6e672d6 ("pull clearing RESTORE_SIGMASK into
block_sigmask()") caused:

  arch/mn10300/kernel/signal.c: In function 'handle_signal':
  arch/mn10300/kernel/signal.c:462:3: warning: 'return' with no value, in function returning non-void [-Wreturn-type]

Add the missing return values, and restore the indentation while we're
at it.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mn10300/kernel/signal.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index 6ab0bee2a54f..4d584ae29ae1 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -459,10 +459,11 @@ static int handle_signal(int sig,
 	else
 		ret = setup_frame(sig, ka, oldset, regs);
 	if (ret)
-		return;
+		return ret;
 
 	signal_delivered(sig, info, ka, regs,
-				 test_thread_flag(TIF_SINGLESTEP));
+			 test_thread_flag(TIF_SINGLESTEP));
+	return 0;
 }
 
 /*
-- 
cgit v1.2.3


From 07f604ccfdff2afe8224aee9322080a7c4c2014c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 17 Jul 2012 15:47:57 -0700
Subject: m32r: remove duplicate definition of PTRACE_O_TRACESYSGOOD

Fix the m32r build warning:

  include/linux/ptrace.h:66:0: warning: "PTRACE_O_TRACESYSGOOD" redefined [enabled by default]
  arch/m32r/include/asm/ptrace.h:117:0: note: this is the location of the previous definition

We already have it in <linux/ptrace.h>, so remove it from <asm/ptrace.h>

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m32r/include/asm/ptrace.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/m32r/include/asm/ptrace.h b/arch/m32r/include/asm/ptrace.h
index 527527584dd0..4313aa62b51b 100644
--- a/arch/m32r/include/asm/ptrace.h
+++ b/arch/m32r/include/asm/ptrace.h
@@ -113,9 +113,6 @@ struct pt_regs {
 
 #define PTRACE_OLDSETOPTIONS	21
 
-/* options set using PTRACE_SETOPTIONS */
-#define PTRACE_O_TRACESYSGOOD	0x00000001
-
 #ifdef __KERNEL__
 
 #include <asm/m32r.h>		/* M32R_PSW_BSM, M32R_PSW_BPM */
-- 
cgit v1.2.3


From f9717f3110c8d05ad65b9a91b698cc0612fd77af Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 17 Jul 2012 15:47:59 -0700
Subject: m32r: fix pull clearing RESTORE_SIGMASK into block_sigmask() fallout

Commit a610d6e672d6 ("pull clearing RESTORE_SIGMASK into
block_sigmask()") caused:

  arch/m32r/kernel/signal.c: In function 'handle_signal':
  arch/m32r/kernel/signal.c:289:6: warning: 'return' with a value, in function returning void [enabled by default]

Remove the return value it forgot to remove.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m32r/kernel/signal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index f3fb2c029cfc..d0f60b97bbc5 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -286,7 +286,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
 			case -ERESTARTNOINTR:
 				regs->r0 = regs->orig_r0;
 				if (prev_insn(regs) < 0)
-					return -EFAULT;
+					return;
 		}
 	}
 
-- 
cgit v1.2.3


From a6b202979661eb32646048aeaad7be7b70c2cd22 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 17 Jul 2012 15:48:00 -0700
Subject: m32r: fix 'fix breakage from "m32r: use generic ptrace_resume code"'
 fallout

Commit acdc0d5ef9dd ('m32r: fix breakage from "m32r: use generic
ptrace_resume code"') tried to fix a problem in commit e34112e3966fc
("m32r: use generic ptrace_resume code") by returning values in a
function returning void, causing:

  arch/m32r/kernel/ptrace.c: In function 'user_enable_single_step':
  arch/m32r/kernel/ptrace.c:594:3: warning: 'return' with a value, in function returning void [enabled by default]
  arch/m32r/kernel/ptrace.c:598:3: warning: 'return' with a value, in function returning void [enabled by default]
  arch/m32r/kernel/ptrace.c:601:3: warning: 'return' with a value, in function returning void [enabled by default]
  arch/m32r/kernel/ptrace.c:604:2: warning: 'return' with a value, in function returning void [enabled by default]

Remove the unneeded return values.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m32r/kernel/ptrace.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c
index 4c03361537aa..51f5e9aa4901 100644
--- a/arch/m32r/kernel/ptrace.c
+++ b/arch/m32r/kernel/ptrace.c
@@ -591,17 +591,16 @@ void user_enable_single_step(struct task_struct *child)
 
 	if (access_process_vm(child, pc&~3, &insn, sizeof(insn), 0)
 	    != sizeof(insn))
-		return -EIO;
+		return;
 
 	compute_next_pc(insn, pc, &next_pc, child);
 	if (next_pc & 0x80000000)
-		return -EIO;
+		return;
 
 	if (embed_debug_trap(child, next_pc))
-		return -EIO;
+		return;
 
 	invalidate_cache();
-	return 0;
 }
 
 void user_disable_single_step(struct task_struct *child)
-- 
cgit v1.2.3


From df12aef6a19bb2d69859a94936bda0e6ccaf3327 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 17 Jul 2012 15:48:02 -0700
Subject: m32r: consistently use "suffix-$(...)"

Commit a556bec9955c ("m32r: fix arch/m32r/boot/compressed/Makefile")
changed "$(suffix_y)" to "$(suffix-y)", but didn't update any location
where "suffix_y" is set, causing:

  make[5]: *** No rule to make target `arch/m32r/boot/compressed/vmlinux.bin.', needed by `arch/m32r/boot/compressed/piggy.o'.  Stop.
  make[4]: *** [arch/m32r/boot/compressed/vmlinux] Error 2
  make[3]: *** [zImage] Error 2

Correct the other locations to fix this.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m32r/boot/compressed/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/m32r/boot/compressed/Makefile b/arch/m32r/boot/compressed/Makefile
index 177716b1d613..01729c2979ba 100644
--- a/arch/m32r/boot/compressed/Makefile
+++ b/arch/m32r/boot/compressed/Makefile
@@ -43,9 +43,9 @@ endif
 
 OBJCOPYFLAGS += -R .empty_zero_page
 
-suffix_$(CONFIG_KERNEL_GZIP)	= gz
-suffix_$(CONFIG_KERNEL_BZIP2)	= bz2
-suffix_$(CONFIG_KERNEL_LZMA)	= lzma
+suffix-$(CONFIG_KERNEL_GZIP)	= gz
+suffix-$(CONFIG_KERNEL_BZIP2)	= bz2
+suffix-$(CONFIG_KERNEL_LZMA)	= lzma
 
 $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) FORCE
 	$(call if_changed,ld)
-- 
cgit v1.2.3


From a8abbca6617e1caa2344d2d38d0a35f3e5928b79 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 17 Jul 2012 15:48:04 -0700
Subject: m32r: add memcpy() for CONFIG_KERNEL_GZIP=y

Fix the m32r link error:

    LD      arch/m32r/boot/compressed/vmlinux
  arch/m32r/boot/compressed/misc.o: In function `zlib_updatewindow':
  misc.c:(.text+0x190): undefined reference to `memcpy'
  misc.c:(.text+0x190): relocation truncated to fit: R_M32R_26_PLTREL against undefined symbol `memcpy'
  make[5]: *** [arch/m32r/boot/compressed/vmlinux] Error 1

by adding our own implementation of memcpy().

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m32r/boot/compressed/misc.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c
index 370d60881977..3147aa248b93 100644
--- a/arch/m32r/boot/compressed/misc.c
+++ b/arch/m32r/boot/compressed/misc.c
@@ -39,6 +39,16 @@ static void *memset(void *s, int c, size_t n)
 #endif
 
 #ifdef CONFIG_KERNEL_GZIP
+void *memcpy(void *dest, const void *src, size_t n)
+{
+	char *d = dest;
+	const char *s = src;
+	while (n--)
+		*d++ = *s++;
+
+	return dest;
+}
+
 #define BOOT_HEAP_SIZE             0x10000
 #include "../../../../lib/decompress_inflate.c"
 #endif
-- 
cgit v1.2.3


From 9a75c6e5240f7edc5955e8da5b94bde6f96070b3 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 17 Jul 2012 15:48:05 -0700
Subject: m32r: make memset() global for CONFIG_KERNEL_BZIP2=y

Fix the m32r compile error:

  arch/m32r/boot/compressed/misc.c:31:14: error: static declaration of 'memset' follows non-static declaration
  make[5]: *** [arch/m32r/boot/compressed/misc.o] Error 1
  make[4]: *** [arch/m32r/boot/compressed/vmlinux] Error 2

by removing the static keyword.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m32r/boot/compressed/misc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c
index 3147aa248b93..28a09529f206 100644
--- a/arch/m32r/boot/compressed/misc.c
+++ b/arch/m32r/boot/compressed/misc.c
@@ -28,7 +28,7 @@ static unsigned long free_mem_ptr;
 static unsigned long free_mem_end_ptr;
 
 #ifdef CONFIG_KERNEL_BZIP2
-static void *memset(void *s, int c, size_t n)
+void *memset(void *s, int c, size_t n)
 {
 	char *ss = s;
 
-- 
cgit v1.2.3


From 1c7e7f6c0703d03af6bcd5ccc11fc15d23e5ecbe Mon Sep 17 00:00:00 2001
From: Aaditya Kumar <aaditya.kumar.30@gmail.com>
Date: Tue, 17 Jul 2012 15:48:07 -0700
Subject: mm: fix lost kswapd wakeup in kswapd_stop()

Offlining memory may block forever, waiting for kswapd() to wake up
because kswapd() does not check the event kthread->should_stop before
sleeping.

The proper pattern, from Documentation/memory-barriers.txt, is:

   ---  waker  ---
   event_indicated = 1;
   wake_up_process(event_daemon);

   ---  sleeper  ---
   for (;;) {
      set_current_state(TASK_UNINTERRUPTIBLE);
      if (event_indicated)
         break;
      schedule();
   }

   set_current_state() may be wrapped by:
      prepare_to_wait();

In the kswapd() case, event_indicated is kthread->should_stop.

  === offlining memory (waker) ===
   kswapd_stop()
      kthread_stop()
         kthread->should_stop = 1
         wake_up_process()
         wait_for_completion()

  ===  kswapd_try_to_sleep (sleeper) ===
   kswapd_try_to_sleep()
      prepare_to_wait()
           .
           .
      schedule()
           .
           .
      finish_wait()

The schedule() needs to be protected by a test of kthread->should_stop,
which is wrapped by kthread_should_stop().

Reproducer:
   Do heavy file I/O in background.
   Do a memory offline/online in a tight loop

Signed-off-by: Aaditya Kumar <aaditya.kumar@ap.sony.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmscan.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 661576324c7f..66e431060c05 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2688,7 +2688,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
 		 * them before going back to sleep.
 		 */
 		set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold);
-		schedule();
+
+		if (!kthread_should_stop())
+			schedule();
+
 		set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold);
 	} else {
 		if (remaining)
-- 
cgit v1.2.3


From 5bdca4e0768d3e0f4efa43d9a2cc8210aeb91ab9 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@inktank.com>
Date: Tue, 10 Jul 2012 11:53:34 -0700
Subject: libceph: fix messenger retry

In ancient times, the messenger could both initiate and accept connections.
An artifact if that was data structures to store/process an incoming
ceph_msg_connect request and send an outgoing ceph_msg_connect_reply.
Sadly, the negotiation code was referencing those structures and ignoring
important information (like the peer's connect_seq) from the correct ones.

Among other things, this fixes tight reconnect loops where the server sends
RETRY_SESSION and we (the client) retries with the same connect_seq as last
time.  This bug pretty easily triggered by injecting socket failures on the
MDS and running some fs workload like workunits/direct_io/test_sync_io.

Signed-off-by: Sage Weil <sage@inktank.com>
---
 include/linux/ceph/messenger.h | 12 ++----------
 net/ceph/messenger.c           | 12 ++++++------
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 2521a95fa6d9..44c87e731e9d 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -163,16 +163,8 @@ struct ceph_connection {
 
 	/* connection negotiation temps */
 	char in_banner[CEPH_BANNER_MAX_LEN];
-	union {
-		struct {  /* outgoing connection */
-			struct ceph_msg_connect out_connect;
-			struct ceph_msg_connect_reply in_reply;
-		};
-		struct {  /* incoming */
-			struct ceph_msg_connect in_connect;
-			struct ceph_msg_connect_reply out_reply;
-		};
-	};
+	struct ceph_msg_connect out_connect;
+	struct ceph_msg_connect_reply in_reply;
 	struct ceph_entity_addr actual_peer_addr;
 
 	/* message out temps */
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index b332c3d76059..10255e81be79 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1423,7 +1423,7 @@ static int process_connect(struct ceph_connection *con)
 		 * dropped messages.
 		 */
 		dout("process_connect got RESET peer seq %u\n",
-		     le32_to_cpu(con->in_connect.connect_seq));
+		     le32_to_cpu(con->in_reply.connect_seq));
 		pr_err("%s%lld %s connection reset\n",
 		       ENTITY_NAME(con->peer_name),
 		       ceph_pr_addr(&con->peer_addr.in_addr));
@@ -1450,10 +1450,10 @@ static int process_connect(struct ceph_connection *con)
 		 * If we sent a smaller connect_seq than the peer has, try
 		 * again with a larger value.
 		 */
-		dout("process_connect got RETRY my seq = %u, peer_seq = %u\n",
+		dout("process_connect got RETRY_SESSION my seq %u, peer %u\n",
 		     le32_to_cpu(con->out_connect.connect_seq),
-		     le32_to_cpu(con->in_connect.connect_seq));
-		con->connect_seq = le32_to_cpu(con->in_connect.connect_seq);
+		     le32_to_cpu(con->in_reply.connect_seq));
+		con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
 		ceph_con_out_kvec_reset(con);
 		ret = prepare_write_connect(con);
 		if (ret < 0)
@@ -1468,9 +1468,9 @@ static int process_connect(struct ceph_connection *con)
 		 */
 		dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n",
 		     con->peer_global_seq,
-		     le32_to_cpu(con->in_connect.global_seq));
+		     le32_to_cpu(con->in_reply.global_seq));
 		get_global_seq(con->msgr,
-			       le32_to_cpu(con->in_connect.global_seq));
+			       le32_to_cpu(con->in_reply.global_seq));
 		ceph_con_out_kvec_reset(con);
 		ret = prepare_write_connect(con);
 		if (ret < 0)
-- 
cgit v1.2.3


From 236df3755d944c33120d77e84b5ff6f3917eb307 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Wed, 6 Jun 2012 09:15:33 -0500
Subject: rbd: Fix ceph_snap_context size calculation

ceph_snap_context->snaps is an u64 array

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Reviewed-by: Alex Elder <elder@inktank.com>
(cherry picked from commit f9f9a1904467816452fc70740165030e84c2c659)
---
 drivers/block/rbd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 65665c9c42c6..8b9c1734d807 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -499,7 +499,7 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
 			 / sizeof (*ondisk))
 		return -EINVAL;
 	header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
-				snap_count * sizeof (*ondisk),
+				snap_count * sizeof(u64),
 				gfp_flags);
 	if (!header->snapc)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 6a3ca4f18873f950895cb64ddefafb51a732e3f7 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 6 Jun 2012 09:15:33 -0500
Subject: rbd: endian bug in rbd_req_cb()

Sparse complains about this because:
drivers/block/rbd.c:996:20: warning: cast to restricted __le32
drivers/block/rbd.c:996:20: warning: cast from restricted __le16

These are set in osd_req_encode_op() and they are le16.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Alex Elder <elder@inktank.com>
(cherry picked from commit 895cfcc810e53d7d36639969c71efb9087221167)
---
 drivers/block/rbd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 8b9c1734d807..8f428a8ab003 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -977,7 +977,7 @@ static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
 	op = (void *)(replyhead + 1);
 	rc = le32_to_cpu(replyhead->result);
 	bytes = le64_to_cpu(op->extent.length);
-	read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ);
+	read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ);
 
 	dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc);
 
-- 
cgit v1.2.3


From 5cb6a9bccaa65e0cbf567485ac6d04ca3fdae79c Mon Sep 17 00:00:00 2001
From: Vipul Kumar Samar <vipulkumar.samar@st.com>
Date: Tue, 10 Jul 2012 17:12:43 +0530
Subject: clk:spear1340:Fix: Rename clk ids within predefined limit

The max limit of con_id is 16 and dev_id is 20. As of now for spear1340, many
clk ids are exceeding this predefined limit.

This patch rename clk ids like:
    mux_clk -> _mclk
    gate_clk -> _gclk
    synth_clk -> syn_clk
    gmac_phy -> phy_
    gmii_125m_pad_ -> gmii_pad

Signed-off-by: Vipul Kumar Samar <vipulkumar.samar@st.com>
Signed-off-by: Shiraz Hashim <shiraz.hashim@st.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/clk/spear/spear1340_clock.c | 273 ++++++++++++++++++------------------
 1 file changed, 135 insertions(+), 138 deletions(-)

diff --git a/drivers/clk/spear/spear1340_clock.c b/drivers/clk/spear/spear1340_clock.c
index e3ea72162236..0f2324b9321b 100644
--- a/drivers/clk/spear/spear1340_clock.c
+++ b/drivers/clk/spear/spear1340_clock.c
@@ -370,26 +370,24 @@ static struct frac_rate_tbl gen_rtbl[] = {
 /* clock parents */
 static const char *vco_parents[] = { "osc_24m_clk", "osc_25m_clk", };
 static const char *sys_parents[] = { "none", "pll1_clk", "none", "none",
-	"sys_synth_clk", "none", "pll2_clk", "pll3_clk", };
-static const char *ahb_parents[] = { "cpu_div3_clk", "amba_synth_clk", };
+	"sys_syn_clk", "none", "pll2_clk", "pll3_clk", };
+static const char *ahb_parents[] = { "cpu_div3_clk", "amba_syn_clk", };
 static const char *gpt_parents[] = { "osc_24m_clk", "apb_clk", };
 static const char *uart0_parents[] = { "pll5_clk", "osc_24m_clk",
-	"uart0_synth_gate_clk", };
+	"uart0_syn_gclk", };
 static const char *uart1_parents[] = { "pll5_clk", "osc_24m_clk",
-	"uart1_synth_gate_clk", };
-static const char *c3_parents[] = { "pll5_clk", "c3_synth_gate_clk", };
-static const char *gmac_phy_input_parents[] = { "gmii_125m_pad_clk", "pll2_clk",
+	"uart1_syn_gclk", };
+static const char *c3_parents[] = { "pll5_clk", "c3_syn_gclk", };
+static const char *gmac_phy_input_parents[] = { "gmii_pad_clk", "pll2_clk",
 	"osc_25m_clk", };
-static const char *gmac_phy_parents[] = { "gmac_phy_input_mux_clk",
-	"gmac_phy_synth_gate_clk", };
+static const char *gmac_phy_parents[] = { "phy_input_mclk", "phy_syn_gclk", };
 static const char *clcd_synth_parents[] = { "vco1div4_clk", "pll2_clk", };
-static const char *clcd_pixel_parents[] = { "pll5_clk", "clcd_synth_clk", };
+static const char *clcd_pixel_parents[] = { "pll5_clk", "clcd_syn_clk", };
 static const char *i2s_src_parents[] = { "vco1div2_clk", "pll2_clk", "pll3_clk",
 	"i2s_src_pad_clk", };
-static const char *i2s_ref_parents[] = { "i2s_src_mux_clk", "i2s_prs1_clk", };
-static const char *spdif_out_parents[] = { "i2s_src_pad_clk", "gen_synth2_clk",
-};
-static const char *spdif_in_parents[] = { "pll2_clk", "gen_synth3_clk", };
+static const char *i2s_ref_parents[] = { "i2s_src_mclk", "i2s_prs1_clk", };
+static const char *spdif_out_parents[] = { "i2s_src_pad_clk", "gen_syn2_clk", };
+static const char *spdif_in_parents[] = { "pll2_clk", "gen_syn3_clk", };
 
 static const char *gen_synth0_1_parents[] = { "vco1div4_clk", "vco3div2_clk",
 	"pll3_clk", };
@@ -415,9 +413,9 @@ void __init spear1340_clk_init(void)
 			25000000);
 	clk_register_clkdev(clk, "osc_25m_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "gmii_125m_pad_clk", NULL,
-			CLK_IS_ROOT, 125000000);
-	clk_register_clkdev(clk, "gmii_125m_pad_clk", NULL);
+	clk = clk_register_fixed_rate(NULL, "gmii_pad_clk", NULL, CLK_IS_ROOT,
+			125000000);
+	clk_register_clkdev(clk, "gmii_pad_clk", NULL);
 
 	clk = clk_register_fixed_rate(NULL, "i2s_src_pad_clk", NULL,
 			CLK_IS_ROOT, 12288000);
@@ -431,35 +429,35 @@ void __init spear1340_clk_init(void)
 
 	/* clock derived from 24 or 25 MHz osc clk */
 	/* vco-pll */
-	clk = clk_register_mux(NULL, "vco1_mux_clk", vco_parents,
+	clk = clk_register_mux(NULL, "vco1_mclk", vco_parents,
 			ARRAY_SIZE(vco_parents), 0, SPEAR1340_PLL_CFG,
 			SPEAR1340_PLL1_CLK_SHIFT, SPEAR1340_PLL_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "vco1_mux_clk", NULL);
-	clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mux_clk",
-			0, SPEAR1340_PLL1_CTR, SPEAR1340_PLL1_FRQ, pll_rtbl,
+	clk_register_clkdev(clk, "vco1_mclk", NULL);
+	clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mclk", 0,
+			SPEAR1340_PLL1_CTR, SPEAR1340_PLL1_FRQ, pll_rtbl,
 			ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL);
 	clk_register_clkdev(clk, "vco1_clk", NULL);
 	clk_register_clkdev(clk1, "pll1_clk", NULL);
 
-	clk = clk_register_mux(NULL, "vco2_mux_clk", vco_parents,
+	clk = clk_register_mux(NULL, "vco2_mclk", vco_parents,
 			ARRAY_SIZE(vco_parents), 0, SPEAR1340_PLL_CFG,
 			SPEAR1340_PLL2_CLK_SHIFT, SPEAR1340_PLL_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "vco2_mux_clk", NULL);
-	clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mux_clk",
-			0, SPEAR1340_PLL2_CTR, SPEAR1340_PLL2_FRQ, pll_rtbl,
+	clk_register_clkdev(clk, "vco2_mclk", NULL);
+	clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mclk", 0,
+			SPEAR1340_PLL2_CTR, SPEAR1340_PLL2_FRQ, pll_rtbl,
 			ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL);
 	clk_register_clkdev(clk, "vco2_clk", NULL);
 	clk_register_clkdev(clk1, "pll2_clk", NULL);
 
-	clk = clk_register_mux(NULL, "vco3_mux_clk", vco_parents,
+	clk = clk_register_mux(NULL, "vco3_mclk", vco_parents,
 			ARRAY_SIZE(vco_parents), 0, SPEAR1340_PLL_CFG,
 			SPEAR1340_PLL3_CLK_SHIFT, SPEAR1340_PLL_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "vco3_mux_clk", NULL);
-	clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mux_clk",
-			0, SPEAR1340_PLL3_CTR, SPEAR1340_PLL3_FRQ, pll_rtbl,
+	clk_register_clkdev(clk, "vco3_mclk", NULL);
+	clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mclk", 0,
+			SPEAR1340_PLL3_CTR, SPEAR1340_PLL3_FRQ, pll_rtbl,
 			ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL);
 	clk_register_clkdev(clk, "vco3_clk", NULL);
 	clk_register_clkdev(clk1, "pll3_clk", NULL);
@@ -498,7 +496,7 @@ void __init spear1340_clk_init(void)
 	/* peripherals */
 	clk_register_fixed_factor(NULL, "thermal_clk", "osc_24m_clk", 0, 1,
 			128);
-	clk = clk_register_gate(NULL, "thermal_gate_clk", "thermal_clk", 0,
+	clk = clk_register_gate(NULL, "thermal_gclk", "thermal_clk", 0,
 			SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_THSENS_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "spear_thermal");
@@ -509,23 +507,23 @@ void __init spear1340_clk_init(void)
 	clk_register_clkdev(clk, "ddr_clk", NULL);
 
 	/* clock derived from pll1 clk */
-	clk = clk_register_frac("sys_synth_clk", "vco1div2_clk", 0,
+	clk = clk_register_frac("sys_syn_clk", "vco1div2_clk", 0,
 			SPEAR1340_SYS_CLK_SYNT, sys_synth_rtbl,
 			ARRAY_SIZE(sys_synth_rtbl), &_lock);
-	clk_register_clkdev(clk, "sys_synth_clk", NULL);
+	clk_register_clkdev(clk, "sys_syn_clk", NULL);
 
-	clk = clk_register_frac("amba_synth_clk", "vco1div2_clk", 0,
+	clk = clk_register_frac("amba_syn_clk", "vco1div2_clk", 0,
 			SPEAR1340_AMBA_CLK_SYNT, amba_synth_rtbl,
 			ARRAY_SIZE(amba_synth_rtbl), &_lock);
-	clk_register_clkdev(clk, "amba_synth_clk", NULL);
+	clk_register_clkdev(clk, "amba_syn_clk", NULL);
 
-	clk = clk_register_mux(NULL, "sys_mux_clk", sys_parents,
+	clk = clk_register_mux(NULL, "sys_mclk", sys_parents,
 			ARRAY_SIZE(sys_parents), 0, SPEAR1340_SYS_CLK_CTRL,
 			SPEAR1340_SCLK_SRC_SEL_SHIFT,
 			SPEAR1340_SCLK_SRC_SEL_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "sys_clk", NULL);
 
-	clk = clk_register_fixed_factor(NULL, "cpu_clk", "sys_mux_clk", 0, 1,
+	clk = clk_register_fixed_factor(NULL, "cpu_clk", "sys_mclk", 0, 1,
 			2);
 	clk_register_clkdev(clk, "cpu_clk", NULL);
 
@@ -548,194 +546,193 @@ void __init spear1340_clk_init(void)
 	clk_register_clkdev(clk, "apb_clk", NULL);
 
 	/* gpt clocks */
-	clk = clk_register_mux(NULL, "gpt0_mux_clk", gpt_parents,
+	clk = clk_register_mux(NULL, "gpt0_mclk", gpt_parents,
 			ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG,
 			SPEAR1340_GPT0_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "gpt0_mux_clk", NULL);
-	clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mux_clk", 0,
+	clk_register_clkdev(clk, "gpt0_mclk", NULL);
+	clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mclk", 0,
 			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_GPT0_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "gpt0");
 
-	clk = clk_register_mux(NULL, "gpt1_mux_clk", gpt_parents,
+	clk = clk_register_mux(NULL, "gpt1_mclk", gpt_parents,
 			ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG,
 			SPEAR1340_GPT1_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "gpt1_mux_clk", NULL);
-	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mux_clk", 0,
+	clk_register_clkdev(clk, "gpt1_mclk", NULL);
+	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0,
 			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_GPT1_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "gpt1");
 
-	clk = clk_register_mux(NULL, "gpt2_mux_clk", gpt_parents,
+	clk = clk_register_mux(NULL, "gpt2_mclk", gpt_parents,
 			ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG,
 			SPEAR1340_GPT2_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "gpt2_mux_clk", NULL);
-	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mux_clk", 0,
+	clk_register_clkdev(clk, "gpt2_mclk", NULL);
+	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0,
 			SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_GPT2_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "gpt2");
 
-	clk = clk_register_mux(NULL, "gpt3_mux_clk", gpt_parents,
+	clk = clk_register_mux(NULL, "gpt3_mclk", gpt_parents,
 			ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG,
 			SPEAR1340_GPT3_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "gpt3_mux_clk", NULL);
-	clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mux_clk", 0,
+	clk_register_clkdev(clk, "gpt3_mclk", NULL);
+	clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mclk", 0,
 			SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_GPT3_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "gpt3");
 
 	/* others */
-	clk = clk_register_aux("uart0_synth_clk", "uart0_synth_gate_clk",
+	clk = clk_register_aux("uart0_syn_clk", "uart0_syn_gclk",
 			"vco1div2_clk", 0, SPEAR1340_UART0_CLK_SYNT, NULL,
 			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "uart0_synth_clk", NULL);
-	clk_register_clkdev(clk1, "uart0_synth_gate_clk", NULL);
+	clk_register_clkdev(clk, "uart0_syn_clk", NULL);
+	clk_register_clkdev(clk1, "uart0_syn_gclk", NULL);
 
-	clk = clk_register_mux(NULL, "uart0_mux_clk", uart0_parents,
+	clk = clk_register_mux(NULL, "uart0_mclk", uart0_parents,
 			ARRAY_SIZE(uart0_parents), 0, SPEAR1340_PERIP_CLK_CFG,
 			SPEAR1340_UART0_CLK_SHIFT, SPEAR1340_UART_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "uart0_mux_clk", NULL);
+	clk_register_clkdev(clk, "uart0_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "uart0_clk", "uart0_mux_clk", 0,
+	clk = clk_register_gate(NULL, "uart0_clk", "uart0_mclk", 0,
 			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_UART0_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "e0000000.serial");
 
-	clk = clk_register_aux("uart1_synth_clk", "uart1_synth_gate_clk",
+	clk = clk_register_aux("uart1_syn_clk", "uart1_syn_gclk",
 			"vco1div2_clk", 0, SPEAR1340_UART1_CLK_SYNT, NULL,
 			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "uart1_synth_clk", NULL);
-	clk_register_clkdev(clk1, "uart1_synth_gate_clk", NULL);
+	clk_register_clkdev(clk, "uart1_syn_clk", NULL);
+	clk_register_clkdev(clk1, "uart1_syn_gclk", NULL);
 
-	clk = clk_register_mux(NULL, "uart1_mux_clk", uart1_parents,
+	clk = clk_register_mux(NULL, "uart1_mclk", uart1_parents,
 			ARRAY_SIZE(uart1_parents), 0, SPEAR1340_PERIP_CLK_CFG,
 			SPEAR1340_UART1_CLK_SHIFT, SPEAR1340_UART_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "uart1_mux_clk", NULL);
+	clk_register_clkdev(clk, "uart1_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "uart1_clk", "uart1_mux_clk", 0,
+	clk = clk_register_gate(NULL, "uart1_clk", "uart1_mclk", 0,
 			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_UART1_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "b4100000.serial");
 
-	clk = clk_register_aux("sdhci_synth_clk", "sdhci_synth_gate_clk",
+	clk = clk_register_aux("sdhci_syn_clk", "sdhci_syn_gclk",
 			"vco1div2_clk", 0, SPEAR1340_SDHCI_CLK_SYNT, NULL,
 			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "sdhci_synth_clk", NULL);
-	clk_register_clkdev(clk1, "sdhci_synth_gate_clk", NULL);
+	clk_register_clkdev(clk, "sdhci_syn_clk", NULL);
+	clk_register_clkdev(clk1, "sdhci_syn_gclk", NULL);
 
-	clk = clk_register_gate(NULL, "sdhci_clk", "sdhci_synth_gate_clk", 0,
+	clk = clk_register_gate(NULL, "sdhci_clk", "sdhci_syn_gclk", 0,
 			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_SDHCI_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "b3000000.sdhci");
 
-	clk = clk_register_aux("cfxd_synth_clk", "cfxd_synth_gate_clk",
-			"vco1div2_clk", 0, SPEAR1340_CFXD_CLK_SYNT, NULL,
-			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "cfxd_synth_clk", NULL);
-	clk_register_clkdev(clk1, "cfxd_synth_gate_clk", NULL);
+	clk = clk_register_aux("cfxd_syn_clk", "cfxd_syn_gclk", "vco1div2_clk",
+			0, SPEAR1340_CFXD_CLK_SYNT, NULL, aux_rtbl,
+			ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
+	clk_register_clkdev(clk, "cfxd_syn_clk", NULL);
+	clk_register_clkdev(clk1, "cfxd_syn_gclk", NULL);
 
-	clk = clk_register_gate(NULL, "cfxd_clk", "cfxd_synth_gate_clk", 0,
+	clk = clk_register_gate(NULL, "cfxd_clk", "cfxd_syn_gclk", 0,
 			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_CFXD_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "b2800000.cf");
 	clk_register_clkdev(clk, NULL, "arasan_xd");
 
-	clk = clk_register_aux("c3_synth_clk", "c3_synth_gate_clk",
-			"vco1div2_clk", 0, SPEAR1340_C3_CLK_SYNT, NULL,
-			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "c3_synth_clk", NULL);
-	clk_register_clkdev(clk1, "c3_synth_gate_clk", NULL);
+	clk = clk_register_aux("c3_syn_clk", "c3_syn_gclk", "vco1div2_clk", 0,
+			SPEAR1340_C3_CLK_SYNT, NULL, aux_rtbl,
+			ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
+	clk_register_clkdev(clk, "c3_syn_clk", NULL);
+	clk_register_clkdev(clk1, "c3_syn_gclk", NULL);
 
-	clk = clk_register_mux(NULL, "c3_mux_clk", c3_parents,
+	clk = clk_register_mux(NULL, "c3_mclk", c3_parents,
 			ARRAY_SIZE(c3_parents), 0, SPEAR1340_PERIP_CLK_CFG,
 			SPEAR1340_C3_CLK_SHIFT, SPEAR1340_C3_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "c3_mux_clk", NULL);
+	clk_register_clkdev(clk, "c3_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "c3_clk", "c3_mux_clk", 0,
+	clk = clk_register_gate(NULL, "c3_clk", "c3_mclk", 0,
 			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_C3_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "c3");
 
 	/* gmac */
-	clk = clk_register_mux(NULL, "gmac_phy_input_mux_clk",
-			gmac_phy_input_parents,
+	clk = clk_register_mux(NULL, "phy_input_mclk", gmac_phy_input_parents,
 			ARRAY_SIZE(gmac_phy_input_parents), 0,
 			SPEAR1340_GMAC_CLK_CFG,
 			SPEAR1340_GMAC_PHY_INPUT_CLK_SHIFT,
 			SPEAR1340_GMAC_PHY_INPUT_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "gmac_phy_input_mux_clk", NULL);
+	clk_register_clkdev(clk, "phy_input_mclk", NULL);
 
-	clk = clk_register_aux("gmac_phy_synth_clk", "gmac_phy_synth_gate_clk",
-			"gmac_phy_input_mux_clk", 0, SPEAR1340_GMAC_CLK_SYNT,
-			NULL, gmac_rtbl, ARRAY_SIZE(gmac_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "gmac_phy_synth_clk", NULL);
-	clk_register_clkdev(clk1, "gmac_phy_synth_gate_clk", NULL);
+	clk = clk_register_aux("phy_syn_clk", "phy_syn_gclk", "phy_input_mclk",
+			0, SPEAR1340_GMAC_CLK_SYNT, NULL, gmac_rtbl,
+			ARRAY_SIZE(gmac_rtbl), &_lock, &clk1);
+	clk_register_clkdev(clk, "phy_syn_clk", NULL);
+	clk_register_clkdev(clk1, "phy_syn_gclk", NULL);
 
-	clk = clk_register_mux(NULL, "gmac_phy_mux_clk", gmac_phy_parents,
+	clk = clk_register_mux(NULL, "phy_mclk", gmac_phy_parents,
 			ARRAY_SIZE(gmac_phy_parents), 0,
 			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GMAC_PHY_CLK_SHIFT,
 			SPEAR1340_GMAC_PHY_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "stmmacphy.0");
 
 	/* clcd */
-	clk = clk_register_mux(NULL, "clcd_synth_mux_clk", clcd_synth_parents,
+	clk = clk_register_mux(NULL, "clcd_syn_mclk", clcd_synth_parents,
 			ARRAY_SIZE(clcd_synth_parents), 0,
 			SPEAR1340_CLCD_CLK_SYNT, SPEAR1340_CLCD_SYNT_CLK_SHIFT,
 			SPEAR1340_CLCD_SYNT_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "clcd_synth_mux_clk", NULL);
+	clk_register_clkdev(clk, "clcd_syn_mclk", NULL);
 
-	clk = clk_register_frac("clcd_synth_clk", "clcd_synth_mux_clk", 0,
+	clk = clk_register_frac("clcd_syn_clk", "clcd_syn_mclk", 0,
 			SPEAR1340_CLCD_CLK_SYNT, clcd_rtbl,
 			ARRAY_SIZE(clcd_rtbl), &_lock);
-	clk_register_clkdev(clk, "clcd_synth_clk", NULL);
+	clk_register_clkdev(clk, "clcd_syn_clk", NULL);
 
-	clk = clk_register_mux(NULL, "clcd_pixel_mux_clk", clcd_pixel_parents,
+	clk = clk_register_mux(NULL, "clcd_pixel_mclk", clcd_pixel_parents,
 			ARRAY_SIZE(clcd_pixel_parents), 0,
 			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_CLCD_CLK_SHIFT,
 			SPEAR1340_CLCD_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "clcd_pixel_clk", NULL);
 
-	clk = clk_register_gate(NULL, "clcd_clk", "clcd_pixel_mux_clk", 0,
+	clk = clk_register_gate(NULL, "clcd_clk", "clcd_pixel_mclk", 0,
 			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_CLCD_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, "clcd_clk", NULL);
 
 	/* i2s */
-	clk = clk_register_mux(NULL, "i2s_src_mux_clk", i2s_src_parents,
+	clk = clk_register_mux(NULL, "i2s_src_mclk", i2s_src_parents,
 			ARRAY_SIZE(i2s_src_parents), 0, SPEAR1340_I2S_CLK_CFG,
 			SPEAR1340_I2S_SRC_CLK_SHIFT, SPEAR1340_I2S_SRC_CLK_MASK,
 			0, &_lock);
 	clk_register_clkdev(clk, "i2s_src_clk", NULL);
 
-	clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mux_clk", 0,
+	clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mclk", 0,
 			SPEAR1340_I2S_CLK_CFG, &i2s_prs1_masks, i2s_prs1_rtbl,
 			ARRAY_SIZE(i2s_prs1_rtbl), &_lock, NULL);
 	clk_register_clkdev(clk, "i2s_prs1_clk", NULL);
 
-	clk = clk_register_mux(NULL, "i2s_ref_mux_clk", i2s_ref_parents,
+	clk = clk_register_mux(NULL, "i2s_ref_mclk", i2s_ref_parents,
 			ARRAY_SIZE(i2s_ref_parents), 0, SPEAR1340_I2S_CLK_CFG,
 			SPEAR1340_I2S_REF_SHIFT, SPEAR1340_I2S_REF_SEL_MASK, 0,
 			&_lock);
 	clk_register_clkdev(clk, "i2s_ref_clk", NULL);
 
-	clk = clk_register_gate(NULL, "i2s_ref_pad_clk", "i2s_ref_mux_clk", 0,
+	clk = clk_register_gate(NULL, "i2s_ref_pad_clk", "i2s_ref_mclk", 0,
 			SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_I2S_REF_PAD_CLK_ENB,
 			0, &_lock);
 	clk_register_clkdev(clk, "i2s_ref_pad_clk", NULL);
 
-	clk = clk_register_aux("i2s_sclk_clk", "i2s_sclk_gate_clk",
-			"i2s_ref_mux_clk", 0, SPEAR1340_I2S_CLK_CFG,
-			&i2s_sclk_masks, i2s_sclk_rtbl,
-			ARRAY_SIZE(i2s_sclk_rtbl), &_lock, &clk1);
+	clk = clk_register_aux("i2s_sclk_clk", "i2s_sclk_gclk", "i2s_ref_mclk",
+			0, SPEAR1340_I2S_CLK_CFG, &i2s_sclk_masks,
+			i2s_sclk_rtbl, ARRAY_SIZE(i2s_sclk_rtbl), &_lock,
+			&clk1);
 	clk_register_clkdev(clk, "i2s_sclk_clk", NULL);
-	clk_register_clkdev(clk1, "i2s_sclk_gate_clk", NULL);
+	clk_register_clkdev(clk1, "i2s_sclk_gclk", NULL);
 
 	/* clock derived from ahb clk */
 	clk = clk_register_gate(NULL, "i2c0_clk", "ahb_clk", 0,
@@ -800,13 +797,13 @@ void __init spear1340_clk_init(void)
 			&_lock);
 	clk_register_clkdev(clk, "sysram1_clk", NULL);
 
-	clk = clk_register_aux("adc_synth_clk", "adc_synth_gate_clk", "ahb_clk",
+	clk = clk_register_aux("adc_syn_clk", "adc_syn_gclk", "ahb_clk",
 			0, SPEAR1340_ADC_CLK_SYNT, NULL, adc_rtbl,
 			ARRAY_SIZE(adc_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "adc_synth_clk", NULL);
-	clk_register_clkdev(clk1, "adc_synth_gate_clk", NULL);
+	clk_register_clkdev(clk, "adc_syn_clk", NULL);
+	clk_register_clkdev(clk1, "adc_syn_gclk", NULL);
 
-	clk = clk_register_gate(NULL, "adc_clk", "adc_synth_gate_clk", 0,
+	clk = clk_register_gate(NULL, "adc_clk", "adc_syn_gclk", 0,
 			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_ADC_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "adc_clk");
@@ -843,39 +840,39 @@ void __init spear1340_clk_init(void)
 	clk_register_clkdev(clk, NULL, "e0300000.kbd");
 
 	/* RAS clks */
-	clk = clk_register_mux(NULL, "gen_synth0_1_mux_clk",
-			gen_synth0_1_parents, ARRAY_SIZE(gen_synth0_1_parents),
-			0, SPEAR1340_PLL_CFG, SPEAR1340_GEN_SYNT0_1_CLK_SHIFT,
+	clk = clk_register_mux(NULL, "gen_syn0_1_mclk", gen_synth0_1_parents,
+			ARRAY_SIZE(gen_synth0_1_parents), 0, SPEAR1340_PLL_CFG,
+			SPEAR1340_GEN_SYNT0_1_CLK_SHIFT,
 			SPEAR1340_GEN_SYNT_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "gen_synth0_1_clk", NULL);
+	clk_register_clkdev(clk, "gen_syn0_1_clk", NULL);
 
-	clk = clk_register_mux(NULL, "gen_synth2_3_mux_clk",
-			gen_synth2_3_parents, ARRAY_SIZE(gen_synth2_3_parents),
-			0, SPEAR1340_PLL_CFG, SPEAR1340_GEN_SYNT2_3_CLK_SHIFT,
+	clk = clk_register_mux(NULL, "gen_syn2_3_mclk", gen_synth2_3_parents,
+			ARRAY_SIZE(gen_synth2_3_parents), 0, SPEAR1340_PLL_CFG,
+			SPEAR1340_GEN_SYNT2_3_CLK_SHIFT,
 			SPEAR1340_GEN_SYNT_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "gen_synth2_3_clk", NULL);
+	clk_register_clkdev(clk, "gen_syn2_3_clk", NULL);
 
-	clk = clk_register_frac("gen_synth0_clk", "gen_synth0_1_clk", 0,
+	clk = clk_register_frac("gen_syn0_clk", "gen_syn0_1_clk", 0,
 			SPEAR1340_GEN_CLK_SYNT0, gen_rtbl, ARRAY_SIZE(gen_rtbl),
 			&_lock);
-	clk_register_clkdev(clk, "gen_synth0_clk", NULL);
+	clk_register_clkdev(clk, "gen_syn0_clk", NULL);
 
-	clk = clk_register_frac("gen_synth1_clk", "gen_synth0_1_clk", 0,
+	clk = clk_register_frac("gen_syn1_clk", "gen_syn0_1_clk", 0,
 			SPEAR1340_GEN_CLK_SYNT1, gen_rtbl, ARRAY_SIZE(gen_rtbl),
 			&_lock);
-	clk_register_clkdev(clk, "gen_synth1_clk", NULL);
+	clk_register_clkdev(clk, "gen_syn1_clk", NULL);
 
-	clk = clk_register_frac("gen_synth2_clk", "gen_synth2_3_clk", 0,
+	clk = clk_register_frac("gen_syn2_clk", "gen_syn2_3_clk", 0,
 			SPEAR1340_GEN_CLK_SYNT2, gen_rtbl, ARRAY_SIZE(gen_rtbl),
 			&_lock);
-	clk_register_clkdev(clk, "gen_synth2_clk", NULL);
+	clk_register_clkdev(clk, "gen_syn2_clk", NULL);
 
-	clk = clk_register_frac("gen_synth3_clk", "gen_synth2_3_clk", 0,
+	clk = clk_register_frac("gen_syn3_clk", "gen_syn2_3_clk", 0,
 			SPEAR1340_GEN_CLK_SYNT3, gen_rtbl, ARRAY_SIZE(gen_rtbl),
 			&_lock);
-	clk_register_clkdev(clk, "gen_synth3_clk", NULL);
+	clk_register_clkdev(clk, "gen_syn3_clk", NULL);
 
-	clk = clk_register_gate(NULL, "mali_clk", "gen_synth3_clk", 0,
+	clk = clk_register_gate(NULL, "mali_clk", "gen_syn3_clk", 0,
 			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_MALI_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "mali");
@@ -890,74 +887,74 @@ void __init spear1340_clk_init(void)
 			&_lock);
 	clk_register_clkdev(clk, NULL, "spear_cec.1");
 
-	clk = clk_register_mux(NULL, "spdif_out_mux_clk", spdif_out_parents,
+	clk = clk_register_mux(NULL, "spdif_out_mclk", spdif_out_parents,
 			ARRAY_SIZE(spdif_out_parents), 0,
 			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_SPDIF_OUT_CLK_SHIFT,
 			SPEAR1340_SPDIF_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "spdif_out_mux_clk", NULL);
+	clk_register_clkdev(clk, "spdif_out_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "spdif_out_clk", "spdif_out_mux_clk", 0,
+	clk = clk_register_gate(NULL, "spdif_out_clk", "spdif_out_mclk", 0,
 			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_SPDIF_OUT_CLK_ENB,
 			0, &_lock);
 	clk_register_clkdev(clk, NULL, "spdif-out");
 
-	clk = clk_register_mux(NULL, "spdif_in_mux_clk", spdif_in_parents,
+	clk = clk_register_mux(NULL, "spdif_in_mclk", spdif_in_parents,
 			ARRAY_SIZE(spdif_in_parents), 0,
 			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_SPDIF_IN_CLK_SHIFT,
 			SPEAR1340_SPDIF_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "spdif_in_mux_clk", NULL);
+	clk_register_clkdev(clk, "spdif_in_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "spdif_in_clk", "spdif_in_mux_clk", 0,
+	clk = clk_register_gate(NULL, "spdif_in_clk", "spdif_in_mclk", 0,
 			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_SPDIF_IN_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "spdif-in");
 
-	clk = clk_register_gate(NULL, "acp_clk", "acp_mux_clk", 0,
+	clk = clk_register_gate(NULL, "acp_clk", "acp_mclk", 0,
 			SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_ACP_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "acp_clk");
 
-	clk = clk_register_gate(NULL, "plgpio_clk", "plgpio_mux_clk", 0,
+	clk = clk_register_gate(NULL, "plgpio_clk", "plgpio_mclk", 0,
 			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_PLGPIO_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "plgpio");
 
-	clk = clk_register_gate(NULL, "video_dec_clk", "video_dec_mux_clk", 0,
+	clk = clk_register_gate(NULL, "video_dec_clk", "video_dec_mclk", 0,
 			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_VIDEO_DEC_CLK_ENB,
 			0, &_lock);
 	clk_register_clkdev(clk, NULL, "video_dec");
 
-	clk = clk_register_gate(NULL, "video_enc_clk", "video_enc_mux_clk", 0,
+	clk = clk_register_gate(NULL, "video_enc_clk", "video_enc_mclk", 0,
 			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_VIDEO_ENC_CLK_ENB,
 			0, &_lock);
 	clk_register_clkdev(clk, NULL, "video_enc");
 
-	clk = clk_register_gate(NULL, "video_in_clk", "video_in_mux_clk", 0,
+	clk = clk_register_gate(NULL, "video_in_clk", "video_in_mclk", 0,
 			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_VIDEO_IN_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "spear_vip");
 
-	clk = clk_register_gate(NULL, "cam0_clk", "cam0_mux_clk", 0,
+	clk = clk_register_gate(NULL, "cam0_clk", "cam0_mclk", 0,
 			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_CAM0_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "spear_camif.0");
 
-	clk = clk_register_gate(NULL, "cam1_clk", "cam1_mux_clk", 0,
+	clk = clk_register_gate(NULL, "cam1_clk", "cam1_mclk", 0,
 			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_CAM1_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "spear_camif.1");
 
-	clk = clk_register_gate(NULL, "cam2_clk", "cam2_mux_clk", 0,
+	clk = clk_register_gate(NULL, "cam2_clk", "cam2_mclk", 0,
 			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_CAM2_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "spear_camif.2");
 
-	clk = clk_register_gate(NULL, "cam3_clk", "cam3_mux_clk", 0,
+	clk = clk_register_gate(NULL, "cam3_clk", "cam3_mclk", 0,
 			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_CAM3_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "spear_camif.3");
 
-	clk = clk_register_gate(NULL, "pwm_clk", "pwm_mux_clk", 0,
+	clk = clk_register_gate(NULL, "pwm_clk", "pwm_mclk", 0,
 			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_PWM_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "pwm");
-- 
cgit v1.2.3


From e28f1aa110c919716188b979c4404e4c8e9794b9 Mon Sep 17 00:00:00 2001
From: Vipul Kumar Samar <vipulkumar.samar@st.com>
Date: Tue, 10 Jul 2012 17:12:44 +0530
Subject: clk:spear1310:Fix: Rename clk ids within predefined limit

The max limit of con_id is 16 and dev_id is 20. As of now for spear1310, many
clk ids are exceeding this predefined limit.

This patch is intended to rename clk ids like:
    mux_clk -> _mclk
    gate_clk -> _gclk
    synth_clk -> syn_clk
    gmac_phy -> phy_
    gmii_125m_pad -> gmii_pad

Signed-off-by: Vipul Kumar Samar <vipulkumar.samar@st.com>
Signed-off-by: Shiraz Hashim <shiraz.hashim@st.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/clk/spear/spear1310_clock.c | 312 ++++++++++++++++++------------------
 1 file changed, 155 insertions(+), 157 deletions(-)

diff --git a/drivers/clk/spear/spear1310_clock.c b/drivers/clk/spear/spear1310_clock.c
index 8f05652d53e6..0fcec2aae19c 100644
--- a/drivers/clk/spear/spear1310_clock.c
+++ b/drivers/clk/spear/spear1310_clock.c
@@ -345,31 +345,30 @@ static struct frac_rate_tbl gen_rtbl[] = {
 /* clock parents */
 static const char *vco_parents[] = { "osc_24m_clk", "osc_25m_clk", };
 static const char *gpt_parents[] = { "osc_24m_clk", "apb_clk", };
-static const char *uart0_parents[] = { "pll5_clk", "uart_synth_gate_clk", };
-static const char *c3_parents[] = { "pll5_clk", "c3_synth_gate_clk", };
-static const char *gmac_phy_input_parents[] = { "gmii_125m_pad_clk", "pll2_clk",
+static const char *uart0_parents[] = { "pll5_clk", "uart_syn_gclk", };
+static const char *c3_parents[] = { "pll5_clk", "c3_syn_gclk", };
+static const char *gmac_phy_input_parents[] = { "gmii_pad_clk", "pll2_clk",
 	"osc_25m_clk", };
-static const char *gmac_phy_parents[] = { "gmac_phy_input_mux_clk",
-	"gmac_phy_synth_gate_clk", };
+static const char *gmac_phy_parents[] = { "phy_input_mclk", "phy_syn_gclk", };
 static const char *clcd_synth_parents[] = { "vco1div4_clk", "pll2_clk", };
-static const char *clcd_pixel_parents[] = { "pll5_clk", "clcd_synth_clk", };
+static const char *clcd_pixel_parents[] = { "pll5_clk", "clcd_syn_clk", };
 static const char *i2s_src_parents[] = { "vco1div2_clk", "none", "pll3_clk",
 	"i2s_src_pad_clk", };
-static const char *i2s_ref_parents[] = { "i2s_src_mux_clk", "i2s_prs1_clk", };
+static const char *i2s_ref_parents[] = { "i2s_src_mclk", "i2s_prs1_clk", };
 static const char *gen_synth0_1_parents[] = { "vco1div4_clk", "vco3div2_clk",
 	"pll3_clk", };
 static const char *gen_synth2_3_parents[] = { "vco1div4_clk", "vco3div2_clk",
 	"pll2_clk", };
 static const char *rmii_phy_parents[] = { "ras_tx50_clk", "none",
-	"ras_pll2_clk", "ras_synth0_clk", };
+	"ras_pll2_clk", "ras_syn0_clk", };
 static const char *smii_rgmii_phy_parents[] = { "none", "ras_tx125_clk",
-	"ras_pll2_clk", "ras_synth0_clk", };
-static const char *uart_parents[] = { "ras_apb_clk", "gen_synth3_clk", };
-static const char *i2c_parents[] = { "ras_apb_clk", "gen_synth1_clk", };
-static const char *ssp1_parents[] = { "ras_apb_clk", "gen_synth1_clk",
+	"ras_pll2_clk", "ras_syn0_clk", };
+static const char *uart_parents[] = { "ras_apb_clk", "gen_syn3_clk", };
+static const char *i2c_parents[] = { "ras_apb_clk", "gen_syn1_clk", };
+static const char *ssp1_parents[] = { "ras_apb_clk", "gen_syn1_clk",
 	"ras_plclk0_clk", };
-static const char *pci_parents[] = { "ras_pll3_clk", "gen_synth2_clk", };
-static const char *tdm_parents[] = { "ras_pll3_clk", "gen_synth1_clk", };
+static const char *pci_parents[] = { "ras_pll3_clk", "gen_syn2_clk", };
+static const char *tdm_parents[] = { "ras_pll3_clk", "gen_syn1_clk", };
 
 void __init spear1310_clk_init(void)
 {
@@ -390,9 +389,9 @@ void __init spear1310_clk_init(void)
 			25000000);
 	clk_register_clkdev(clk, "osc_25m_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "gmii_125m_pad_clk", NULL,
-			CLK_IS_ROOT, 125000000);
-	clk_register_clkdev(clk, "gmii_125m_pad_clk", NULL);
+	clk = clk_register_fixed_rate(NULL, "gmii_pad_clk", NULL, CLK_IS_ROOT,
+			125000000);
+	clk_register_clkdev(clk, "gmii_pad_clk", NULL);
 
 	clk = clk_register_fixed_rate(NULL, "i2s_src_pad_clk", NULL,
 			CLK_IS_ROOT, 12288000);
@@ -406,34 +405,34 @@ void __init spear1310_clk_init(void)
 
 	/* clock derived from 24 or 25 MHz osc clk */
 	/* vco-pll */
-	clk = clk_register_mux(NULL, "vco1_mux_clk", vco_parents,
+	clk = clk_register_mux(NULL, "vco1_mclk", vco_parents,
 			ARRAY_SIZE(vco_parents), 0, SPEAR1310_PLL_CFG,
 			SPEAR1310_PLL1_CLK_SHIFT, SPEAR1310_PLL_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "vco1_mux_clk", NULL);
-	clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mux_clk",
+	clk_register_clkdev(clk, "vco1_mclk", NULL);
+	clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mclk",
 			0, SPEAR1310_PLL1_CTR, SPEAR1310_PLL1_FRQ, pll_rtbl,
 			ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL);
 	clk_register_clkdev(clk, "vco1_clk", NULL);
 	clk_register_clkdev(clk1, "pll1_clk", NULL);
 
-	clk = clk_register_mux(NULL, "vco2_mux_clk", vco_parents,
+	clk = clk_register_mux(NULL, "vco2_mclk", vco_parents,
 			ARRAY_SIZE(vco_parents), 0, SPEAR1310_PLL_CFG,
 			SPEAR1310_PLL2_CLK_SHIFT, SPEAR1310_PLL_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "vco2_mux_clk", NULL);
-	clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mux_clk",
+	clk_register_clkdev(clk, "vco2_mclk", NULL);
+	clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mclk",
 			0, SPEAR1310_PLL2_CTR, SPEAR1310_PLL2_FRQ, pll_rtbl,
 			ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL);
 	clk_register_clkdev(clk, "vco2_clk", NULL);
 	clk_register_clkdev(clk1, "pll2_clk", NULL);
 
-	clk = clk_register_mux(NULL, "vco3_mux_clk", vco_parents,
+	clk = clk_register_mux(NULL, "vco3_mclk", vco_parents,
 			ARRAY_SIZE(vco_parents), 0, SPEAR1310_PLL_CFG,
 			SPEAR1310_PLL3_CLK_SHIFT, SPEAR1310_PLL_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "vco3_mux_clk", NULL);
-	clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mux_clk",
+	clk_register_clkdev(clk, "vco3_mclk", NULL);
+	clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mclk",
 			0, SPEAR1310_PLL3_CTR, SPEAR1310_PLL3_FRQ, pll_rtbl,
 			ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL);
 	clk_register_clkdev(clk, "vco3_clk", NULL);
@@ -473,7 +472,7 @@ void __init spear1310_clk_init(void)
 	/* peripherals */
 	clk_register_fixed_factor(NULL, "thermal_clk", "osc_24m_clk", 0, 1,
 			128);
-	clk = clk_register_gate(NULL, "thermal_gate_clk", "thermal_clk", 0,
+	clk = clk_register_gate(NULL, "thermal_gclk", "thermal_clk", 0,
 			SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_THSENS_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "spear_thermal");
@@ -500,177 +499,176 @@ void __init spear1310_clk_init(void)
 	clk_register_clkdev(clk, "apb_clk", NULL);
 
 	/* gpt clocks */
-	clk = clk_register_mux(NULL, "gpt0_mux_clk", gpt_parents,
+	clk = clk_register_mux(NULL, "gpt0_mclk", gpt_parents,
 			ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG,
 			SPEAR1310_GPT0_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "gpt0_mux_clk", NULL);
-	clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mux_clk", 0,
+	clk_register_clkdev(clk, "gpt0_mclk", NULL);
+	clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mclk", 0,
 			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_GPT0_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "gpt0");
 
-	clk = clk_register_mux(NULL, "gpt1_mux_clk", gpt_parents,
+	clk = clk_register_mux(NULL, "gpt1_mclk", gpt_parents,
 			ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG,
 			SPEAR1310_GPT1_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "gpt1_mux_clk", NULL);
-	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mux_clk", 0,
+	clk_register_clkdev(clk, "gpt1_mclk", NULL);
+	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0,
 			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_GPT1_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "gpt1");
 
-	clk = clk_register_mux(NULL, "gpt2_mux_clk", gpt_parents,
+	clk = clk_register_mux(NULL, "gpt2_mclk", gpt_parents,
 			ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG,
 			SPEAR1310_GPT2_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "gpt2_mux_clk", NULL);
-	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mux_clk", 0,
+	clk_register_clkdev(clk, "gpt2_mclk", NULL);
+	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0,
 			SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_GPT2_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "gpt2");
 
-	clk = clk_register_mux(NULL, "gpt3_mux_clk", gpt_parents,
+	clk = clk_register_mux(NULL, "gpt3_mclk", gpt_parents,
 			ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG,
 			SPEAR1310_GPT3_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "gpt3_mux_clk", NULL);
-	clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mux_clk", 0,
+	clk_register_clkdev(clk, "gpt3_mclk", NULL);
+	clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mclk", 0,
 			SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_GPT3_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "gpt3");
 
 	/* others */
-	clk = clk_register_aux("uart_synth_clk", "uart_synth_gate_clk",
-			"vco1div2_clk", 0, SPEAR1310_UART_CLK_SYNT, NULL,
-			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "uart_synth_clk", NULL);
-	clk_register_clkdev(clk1, "uart_synth_gate_clk", NULL);
+	clk = clk_register_aux("uart_syn_clk", "uart_syn_gclk", "vco1div2_clk",
+			0, SPEAR1310_UART_CLK_SYNT, NULL, aux_rtbl,
+			ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
+	clk_register_clkdev(clk, "uart_syn_clk", NULL);
+	clk_register_clkdev(clk1, "uart_syn_gclk", NULL);
 
-	clk = clk_register_mux(NULL, "uart0_mux_clk", uart0_parents,
+	clk = clk_register_mux(NULL, "uart0_mclk", uart0_parents,
 			ARRAY_SIZE(uart0_parents), 0, SPEAR1310_PERIP_CLK_CFG,
 			SPEAR1310_UART_CLK_SHIFT, SPEAR1310_UART_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "uart0_mux_clk", NULL);
+	clk_register_clkdev(clk, "uart0_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "uart0_clk", "uart0_mux_clk", 0,
+	clk = clk_register_gate(NULL, "uart0_clk", "uart0_mclk", 0,
 			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_UART_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "e0000000.serial");
 
-	clk = clk_register_aux("sdhci_synth_clk", "sdhci_synth_gate_clk",
+	clk = clk_register_aux("sdhci_syn_clk", "sdhci_syn_gclk",
 			"vco1div2_clk", 0, SPEAR1310_SDHCI_CLK_SYNT, NULL,
 			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "sdhci_synth_clk", NULL);
-	clk_register_clkdev(clk1, "sdhci_synth_gate_clk", NULL);
+	clk_register_clkdev(clk, "sdhci_syn_clk", NULL);
+	clk_register_clkdev(clk1, "sdhci_syn_gclk", NULL);
 
-	clk = clk_register_gate(NULL, "sdhci_clk", "sdhci_synth_gate_clk", 0,
+	clk = clk_register_gate(NULL, "sdhci_clk", "sdhci_syn_gclk", 0,
 			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_SDHCI_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "b3000000.sdhci");
 
-	clk = clk_register_aux("cfxd_synth_clk", "cfxd_synth_gate_clk",
-			"vco1div2_clk", 0, SPEAR1310_CFXD_CLK_SYNT, NULL,
-			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "cfxd_synth_clk", NULL);
-	clk_register_clkdev(clk1, "cfxd_synth_gate_clk", NULL);
+	clk = clk_register_aux("cfxd_syn_clk", "cfxd_syn_gclk", "vco1div2_clk",
+			0, SPEAR1310_CFXD_CLK_SYNT, NULL, aux_rtbl,
+			ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
+	clk_register_clkdev(clk, "cfxd_syn_clk", NULL);
+	clk_register_clkdev(clk1, "cfxd_syn_gclk", NULL);
 
-	clk = clk_register_gate(NULL, "cfxd_clk", "cfxd_synth_gate_clk", 0,
+	clk = clk_register_gate(NULL, "cfxd_clk", "cfxd_syn_gclk", 0,
 			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_CFXD_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "b2800000.cf");
 	clk_register_clkdev(clk, NULL, "arasan_xd");
 
-	clk = clk_register_aux("c3_synth_clk", "c3_synth_gate_clk",
-			"vco1div2_clk", 0, SPEAR1310_C3_CLK_SYNT, NULL,
-			aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "c3_synth_clk", NULL);
-	clk_register_clkdev(clk1, "c3_synth_gate_clk", NULL);
+	clk = clk_register_aux("c3_syn_clk", "c3_syn_gclk", "vco1div2_clk",
+			0, SPEAR1310_C3_CLK_SYNT, NULL, aux_rtbl,
+			ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
+	clk_register_clkdev(clk, "c3_syn_clk", NULL);
+	clk_register_clkdev(clk1, "c3_syn_gclk", NULL);
 
-	clk = clk_register_mux(NULL, "c3_mux_clk", c3_parents,
+	clk = clk_register_mux(NULL, "c3_mclk", c3_parents,
 			ARRAY_SIZE(c3_parents), 0, SPEAR1310_PERIP_CLK_CFG,
 			SPEAR1310_C3_CLK_SHIFT, SPEAR1310_C3_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "c3_mux_clk", NULL);
+	clk_register_clkdev(clk, "c3_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "c3_clk", "c3_mux_clk", 0,
+	clk = clk_register_gate(NULL, "c3_clk", "c3_mclk", 0,
 			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_C3_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "c3");
 
 	/* gmac */
-	clk = clk_register_mux(NULL, "gmac_phy_input_mux_clk",
-			gmac_phy_input_parents,
+	clk = clk_register_mux(NULL, "phy_input_mclk", gmac_phy_input_parents,
 			ARRAY_SIZE(gmac_phy_input_parents), 0,
 			SPEAR1310_GMAC_CLK_CFG,
 			SPEAR1310_GMAC_PHY_INPUT_CLK_SHIFT,
 			SPEAR1310_GMAC_PHY_INPUT_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "gmac_phy_input_mux_clk", NULL);
+	clk_register_clkdev(clk, "phy_input_mclk", NULL);
 
-	clk = clk_register_aux("gmac_phy_synth_clk", "gmac_phy_synth_gate_clk",
-			"gmac_phy_input_mux_clk", 0, SPEAR1310_GMAC_CLK_SYNT,
-			NULL, gmac_rtbl, ARRAY_SIZE(gmac_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "gmac_phy_synth_clk", NULL);
-	clk_register_clkdev(clk1, "gmac_phy_synth_gate_clk", NULL);
+	clk = clk_register_aux("phy_syn_clk", "phy_syn_gclk", "phy_input_mclk",
+			0, SPEAR1310_GMAC_CLK_SYNT, NULL, gmac_rtbl,
+			ARRAY_SIZE(gmac_rtbl), &_lock, &clk1);
+	clk_register_clkdev(clk, "phy_syn_clk", NULL);
+	clk_register_clkdev(clk1, "phy_syn_gclk", NULL);
 
-	clk = clk_register_mux(NULL, "gmac_phy_mux_clk", gmac_phy_parents,
+	clk = clk_register_mux(NULL, "phy_mclk", gmac_phy_parents,
 			ARRAY_SIZE(gmac_phy_parents), 0,
 			SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GMAC_PHY_CLK_SHIFT,
 			SPEAR1310_GMAC_PHY_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "stmmacphy.0");
 
 	/* clcd */
-	clk = clk_register_mux(NULL, "clcd_synth_mux_clk", clcd_synth_parents,
+	clk = clk_register_mux(NULL, "clcd_syn_mclk", clcd_synth_parents,
 			ARRAY_SIZE(clcd_synth_parents), 0,
 			SPEAR1310_CLCD_CLK_SYNT, SPEAR1310_CLCD_SYNT_CLK_SHIFT,
 			SPEAR1310_CLCD_SYNT_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "clcd_synth_mux_clk", NULL);
+	clk_register_clkdev(clk, "clcd_syn_mclk", NULL);
 
-	clk = clk_register_frac("clcd_synth_clk", "clcd_synth_mux_clk", 0,
+	clk = clk_register_frac("clcd_syn_clk", "clcd_syn_mclk", 0,
 			SPEAR1310_CLCD_CLK_SYNT, clcd_rtbl,
 			ARRAY_SIZE(clcd_rtbl), &_lock);
-	clk_register_clkdev(clk, "clcd_synth_clk", NULL);
+	clk_register_clkdev(clk, "clcd_syn_clk", NULL);
 
-	clk = clk_register_mux(NULL, "clcd_pixel_mux_clk", clcd_pixel_parents,
+	clk = clk_register_mux(NULL, "clcd_pixel_mclk", clcd_pixel_parents,
 			ARRAY_SIZE(clcd_pixel_parents), 0,
 			SPEAR1310_PERIP_CLK_CFG, SPEAR1310_CLCD_CLK_SHIFT,
 			SPEAR1310_CLCD_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "clcd_pixel_clk", NULL);
 
-	clk = clk_register_gate(NULL, "clcd_clk", "clcd_pixel_mux_clk", 0,
+	clk = clk_register_gate(NULL, "clcd_clk", "clcd_pixel_mclk", 0,
 			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_CLCD_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, "clcd_clk", NULL);
 
 	/* i2s */
-	clk = clk_register_mux(NULL, "i2s_src_mux_clk", i2s_src_parents,
+	clk = clk_register_mux(NULL, "i2s_src_mclk", i2s_src_parents,
 			ARRAY_SIZE(i2s_src_parents), 0, SPEAR1310_I2S_CLK_CFG,
 			SPEAR1310_I2S_SRC_CLK_SHIFT, SPEAR1310_I2S_SRC_CLK_MASK,
 			0, &_lock);
 	clk_register_clkdev(clk, "i2s_src_clk", NULL);
 
-	clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mux_clk", 0,
+	clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mclk", 0,
 			SPEAR1310_I2S_CLK_CFG, &i2s_prs1_masks, i2s_prs1_rtbl,
 			ARRAY_SIZE(i2s_prs1_rtbl), &_lock, NULL);
 	clk_register_clkdev(clk, "i2s_prs1_clk", NULL);
 
-	clk = clk_register_mux(NULL, "i2s_ref_mux_clk", i2s_ref_parents,
+	clk = clk_register_mux(NULL, "i2s_ref_mclk", i2s_ref_parents,
 			ARRAY_SIZE(i2s_ref_parents), 0, SPEAR1310_I2S_CLK_CFG,
 			SPEAR1310_I2S_REF_SHIFT, SPEAR1310_I2S_REF_SEL_MASK, 0,
 			&_lock);
 	clk_register_clkdev(clk, "i2s_ref_clk", NULL);
 
-	clk = clk_register_gate(NULL, "i2s_ref_pad_clk", "i2s_ref_mux_clk", 0,
+	clk = clk_register_gate(NULL, "i2s_ref_pad_clk", "i2s_ref_mclk", 0,
 			SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_I2S_REF_PAD_CLK_ENB,
 			0, &_lock);
 	clk_register_clkdev(clk, "i2s_ref_pad_clk", NULL);
 
-	clk = clk_register_aux("i2s_sclk_clk", "i2s_sclk_gate_clk",
+	clk = clk_register_aux("i2s_sclk_clk", "i2s_sclk_gclk",
 			"i2s_ref_pad_clk", 0, SPEAR1310_I2S_CLK_CFG,
 			&i2s_sclk_masks, i2s_sclk_rtbl,
 			ARRAY_SIZE(i2s_sclk_rtbl), &_lock, &clk1);
 	clk_register_clkdev(clk, "i2s_sclk_clk", NULL);
-	clk_register_clkdev(clk1, "i2s_sclk_gate_clk", NULL);
+	clk_register_clkdev(clk1, "i2s_sclk_gclk", NULL);
 
 	/* clock derived from ahb clk */
 	clk = clk_register_gate(NULL, "i2c0_clk", "ahb_clk", 0,
@@ -747,13 +745,13 @@ void __init spear1310_clk_init(void)
 			&_lock);
 	clk_register_clkdev(clk, "sysram1_clk", NULL);
 
-	clk = clk_register_aux("adc_synth_clk", "adc_synth_gate_clk", "ahb_clk",
+	clk = clk_register_aux("adc_syn_clk", "adc_syn_gclk", "ahb_clk",
 			0, SPEAR1310_ADC_CLK_SYNT, NULL, adc_rtbl,
 			ARRAY_SIZE(adc_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "adc_synth_clk", NULL);
-	clk_register_clkdev(clk1, "adc_synth_gate_clk", NULL);
+	clk_register_clkdev(clk, "adc_syn_clk", NULL);
+	clk_register_clkdev(clk1, "adc_syn_gclk", NULL);
 
-	clk = clk_register_gate(NULL, "adc_clk", "adc_synth_gate_clk", 0,
+	clk = clk_register_gate(NULL, "adc_clk", "adc_syn_gclk", 0,
 			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_ADC_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "adc_clk");
@@ -790,37 +788,37 @@ void __init spear1310_clk_init(void)
 	clk_register_clkdev(clk, NULL, "e0300000.kbd");
 
 	/* RAS clks */
-	clk = clk_register_mux(NULL, "gen_synth0_1_mux_clk",
-			gen_synth0_1_parents, ARRAY_SIZE(gen_synth0_1_parents),
-			0, SPEAR1310_PLL_CFG, SPEAR1310_RAS_SYNT0_1_CLK_SHIFT,
+	clk = clk_register_mux(NULL, "gen_syn0_1_mclk", gen_synth0_1_parents,
+			ARRAY_SIZE(gen_synth0_1_parents), 0, SPEAR1310_PLL_CFG,
+			SPEAR1310_RAS_SYNT0_1_CLK_SHIFT,
 			SPEAR1310_RAS_SYNT_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "gen_synth0_1_clk", NULL);
+	clk_register_clkdev(clk, "gen_syn0_1_clk", NULL);
 
-	clk = clk_register_mux(NULL, "gen_synth2_3_mux_clk",
-			gen_synth2_3_parents, ARRAY_SIZE(gen_synth2_3_parents),
-			0, SPEAR1310_PLL_CFG, SPEAR1310_RAS_SYNT2_3_CLK_SHIFT,
+	clk = clk_register_mux(NULL, "gen_syn2_3_mclk", gen_synth2_3_parents,
+			ARRAY_SIZE(gen_synth2_3_parents), 0, SPEAR1310_PLL_CFG,
+			SPEAR1310_RAS_SYNT2_3_CLK_SHIFT,
 			SPEAR1310_RAS_SYNT_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "gen_synth2_3_clk", NULL);
+	clk_register_clkdev(clk, "gen_syn2_3_clk", NULL);
 
-	clk = clk_register_frac("gen_synth0_clk", "gen_synth0_1_clk", 0,
+	clk = clk_register_frac("gen_syn0_clk", "gen_syn0_1_clk", 0,
 			SPEAR1310_RAS_CLK_SYNT0, gen_rtbl, ARRAY_SIZE(gen_rtbl),
 			&_lock);
-	clk_register_clkdev(clk, "gen_synth0_clk", NULL);
+	clk_register_clkdev(clk, "gen_syn0_clk", NULL);
 
-	clk = clk_register_frac("gen_synth1_clk", "gen_synth0_1_clk", 0,
+	clk = clk_register_frac("gen_syn1_clk", "gen_syn0_1_clk", 0,
 			SPEAR1310_RAS_CLK_SYNT1, gen_rtbl, ARRAY_SIZE(gen_rtbl),
 			&_lock);
-	clk_register_clkdev(clk, "gen_synth1_clk", NULL);
+	clk_register_clkdev(clk, "gen_syn1_clk", NULL);
 
-	clk = clk_register_frac("gen_synth2_clk", "gen_synth2_3_clk", 0,
+	clk = clk_register_frac("gen_syn2_clk", "gen_syn2_3_clk", 0,
 			SPEAR1310_RAS_CLK_SYNT2, gen_rtbl, ARRAY_SIZE(gen_rtbl),
 			&_lock);
-	clk_register_clkdev(clk, "gen_synth2_clk", NULL);
+	clk_register_clkdev(clk, "gen_syn2_clk", NULL);
 
-	clk = clk_register_frac("gen_synth3_clk", "gen_synth2_3_clk", 0,
+	clk = clk_register_frac("gen_syn3_clk", "gen_syn2_3_clk", 0,
 			SPEAR1310_RAS_CLK_SYNT3, gen_rtbl, ARRAY_SIZE(gen_rtbl),
 			&_lock);
-	clk_register_clkdev(clk, "gen_synth3_clk", NULL);
+	clk_register_clkdev(clk, "gen_syn3_clk", NULL);
 
 	clk = clk_register_gate(NULL, "ras_osc_24m_clk", "osc_24m_clk", 0,
 			SPEAR1310_RAS_CLK_ENB, SPEAR1310_OSC_24M_CLK_ENB, 0,
@@ -847,7 +845,7 @@ void __init spear1310_clk_init(void)
 			&_lock);
 	clk_register_clkdev(clk, "ras_pll3_clk", NULL);
 
-	clk = clk_register_gate(NULL, "ras_tx125_clk", "gmii_125m_pad_clk", 0,
+	clk = clk_register_gate(NULL, "ras_tx125_clk", "gmii_pad_clk", 0,
 			SPEAR1310_RAS_CLK_ENB, SPEAR1310_C125M_PAD_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, "ras_tx125_clk", NULL);
@@ -912,7 +910,7 @@ void __init spear1310_clk_init(void)
 			&_lock);
 	clk_register_clkdev(clk, NULL, "5c700000.eth");
 
-	clk = clk_register_mux(NULL, "smii_rgmii_phy_mux_clk",
+	clk = clk_register_mux(NULL, "smii_rgmii_phy_mclk",
 			smii_rgmii_phy_parents,
 			ARRAY_SIZE(smii_rgmii_phy_parents), 0,
 			SPEAR1310_RAS_CTRL_REG1,
@@ -922,184 +920,184 @@ void __init spear1310_clk_init(void)
 	clk_register_clkdev(clk, NULL, "stmmacphy.2");
 	clk_register_clkdev(clk, NULL, "stmmacphy.4");
 
-	clk = clk_register_mux(NULL, "rmii_phy_mux_clk", rmii_phy_parents,
+	clk = clk_register_mux(NULL, "rmii_phy_mclk", rmii_phy_parents,
 			ARRAY_SIZE(rmii_phy_parents), 0,
 			SPEAR1310_RAS_CTRL_REG1, SPEAR1310_RMII_PHY_CLK_SHIFT,
 			SPEAR1310_PHY_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "stmmacphy.3");
 
-	clk = clk_register_mux(NULL, "uart1_mux_clk", uart_parents,
+	clk = clk_register_mux(NULL, "uart1_mclk", uart_parents,
 			ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0,
 			SPEAR1310_UART1_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK,
 			0, &_lock);
-	clk_register_clkdev(clk, "uart1_mux_clk", NULL);
+	clk_register_clkdev(clk, "uart1_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "uart1_clk", "uart1_mux_clk", 0,
+	clk = clk_register_gate(NULL, "uart1_clk", "uart1_mclk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART1_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "5c800000.serial");
 
-	clk = clk_register_mux(NULL, "uart2_mux_clk", uart_parents,
+	clk = clk_register_mux(NULL, "uart2_mclk", uart_parents,
 			ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0,
 			SPEAR1310_UART2_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK,
 			0, &_lock);
-	clk_register_clkdev(clk, "uart2_mux_clk", NULL);
+	clk_register_clkdev(clk, "uart2_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "uart2_clk", "uart2_mux_clk", 0,
+	clk = clk_register_gate(NULL, "uart2_clk", "uart2_mclk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART2_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "5c900000.serial");
 
-	clk = clk_register_mux(NULL, "uart3_mux_clk", uart_parents,
+	clk = clk_register_mux(NULL, "uart3_mclk", uart_parents,
 			ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0,
 			SPEAR1310_UART3_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK,
 			0, &_lock);
-	clk_register_clkdev(clk, "uart3_mux_clk", NULL);
+	clk_register_clkdev(clk, "uart3_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "uart3_clk", "uart3_mux_clk", 0,
+	clk = clk_register_gate(NULL, "uart3_clk", "uart3_mclk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART3_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "5ca00000.serial");
 
-	clk = clk_register_mux(NULL, "uart4_mux_clk", uart_parents,
+	clk = clk_register_mux(NULL, "uart4_mclk", uart_parents,
 			ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0,
 			SPEAR1310_UART4_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK,
 			0, &_lock);
-	clk_register_clkdev(clk, "uart4_mux_clk", NULL);
+	clk_register_clkdev(clk, "uart4_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "uart4_clk", "uart4_mux_clk", 0,
+	clk = clk_register_gate(NULL, "uart4_clk", "uart4_mclk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART4_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "5cb00000.serial");
 
-	clk = clk_register_mux(NULL, "uart5_mux_clk", uart_parents,
+	clk = clk_register_mux(NULL, "uart5_mclk", uart_parents,
 			ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0,
 			SPEAR1310_UART5_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK,
 			0, &_lock);
-	clk_register_clkdev(clk, "uart5_mux_clk", NULL);
+	clk_register_clkdev(clk, "uart5_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "uart5_clk", "uart5_mux_clk", 0,
+	clk = clk_register_gate(NULL, "uart5_clk", "uart5_mclk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART5_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "5cc00000.serial");
 
-	clk = clk_register_mux(NULL, "i2c1_mux_clk", i2c_parents,
+	clk = clk_register_mux(NULL, "i2c1_mclk", i2c_parents,
 			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,
 			SPEAR1310_I2C1_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "i2c1_mux_clk", NULL);
+	clk_register_clkdev(clk, "i2c1_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "i2c1_clk", "i2c1_mux_clk", 0,
+	clk = clk_register_gate(NULL, "i2c1_clk", "i2c1_mclk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C1_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "5cd00000.i2c");
 
-	clk = clk_register_mux(NULL, "i2c2_mux_clk", i2c_parents,
+	clk = clk_register_mux(NULL, "i2c2_mclk", i2c_parents,
 			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,
 			SPEAR1310_I2C2_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "i2c2_mux_clk", NULL);
+	clk_register_clkdev(clk, "i2c2_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "i2c2_clk", "i2c2_mux_clk", 0,
+	clk = clk_register_gate(NULL, "i2c2_clk", "i2c2_mclk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C2_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "5ce00000.i2c");
 
-	clk = clk_register_mux(NULL, "i2c3_mux_clk", i2c_parents,
+	clk = clk_register_mux(NULL, "i2c3_mclk", i2c_parents,
 			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,
 			SPEAR1310_I2C3_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "i2c3_mux_clk", NULL);
+	clk_register_clkdev(clk, "i2c3_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "i2c3_clk", "i2c3_mux_clk", 0,
+	clk = clk_register_gate(NULL, "i2c3_clk", "i2c3_mclk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C3_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "5cf00000.i2c");
 
-	clk = clk_register_mux(NULL, "i2c4_mux_clk", i2c_parents,
+	clk = clk_register_mux(NULL, "i2c4_mclk", i2c_parents,
 			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,
 			SPEAR1310_I2C4_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "i2c4_mux_clk", NULL);
+	clk_register_clkdev(clk, "i2c4_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "i2c4_clk", "i2c4_mux_clk", 0,
+	clk = clk_register_gate(NULL, "i2c4_clk", "i2c4_mclk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C4_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "5d000000.i2c");
 
-	clk = clk_register_mux(NULL, "i2c5_mux_clk", i2c_parents,
+	clk = clk_register_mux(NULL, "i2c5_mclk", i2c_parents,
 			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,
 			SPEAR1310_I2C5_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "i2c5_mux_clk", NULL);
+	clk_register_clkdev(clk, "i2c5_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "i2c5_clk", "i2c5_mux_clk", 0,
+	clk = clk_register_gate(NULL, "i2c5_clk", "i2c5_mclk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C5_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "5d100000.i2c");
 
-	clk = clk_register_mux(NULL, "i2c6_mux_clk", i2c_parents,
+	clk = clk_register_mux(NULL, "i2c6_mclk", i2c_parents,
 			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,
 			SPEAR1310_I2C6_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "i2c6_mux_clk", NULL);
+	clk_register_clkdev(clk, "i2c6_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "i2c6_clk", "i2c6_mux_clk", 0,
+	clk = clk_register_gate(NULL, "i2c6_clk", "i2c6_mclk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C6_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "5d200000.i2c");
 
-	clk = clk_register_mux(NULL, "i2c7_mux_clk", i2c_parents,
+	clk = clk_register_mux(NULL, "i2c7_mclk", i2c_parents,
 			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,
 			SPEAR1310_I2C7_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "i2c7_mux_clk", NULL);
+	clk_register_clkdev(clk, "i2c7_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "i2c7_clk", "i2c7_mux_clk", 0,
+	clk = clk_register_gate(NULL, "i2c7_clk", "i2c7_mclk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C7_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "5d300000.i2c");
 
-	clk = clk_register_mux(NULL, "ssp1_mux_clk", ssp1_parents,
+	clk = clk_register_mux(NULL, "ssp1_mclk", ssp1_parents,
 			ARRAY_SIZE(ssp1_parents), 0, SPEAR1310_RAS_CTRL_REG0,
 			SPEAR1310_SSP1_CLK_SHIFT, SPEAR1310_SSP1_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "ssp1_mux_clk", NULL);
+	clk_register_clkdev(clk, "ssp1_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "ssp1_clk", "ssp1_mux_clk", 0,
+	clk = clk_register_gate(NULL, "ssp1_clk", "ssp1_mclk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_SSP1_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "5d400000.spi");
 
-	clk = clk_register_mux(NULL, "pci_mux_clk", pci_parents,
+	clk = clk_register_mux(NULL, "pci_mclk", pci_parents,
 			ARRAY_SIZE(pci_parents), 0, SPEAR1310_RAS_CTRL_REG0,
 			SPEAR1310_PCI_CLK_SHIFT, SPEAR1310_PCI_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "pci_mux_clk", NULL);
+	clk_register_clkdev(clk, "pci_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "pci_clk", "pci_mux_clk", 0,
+	clk = clk_register_gate(NULL, "pci_clk", "pci_mclk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_PCI_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "pci");
 
-	clk = clk_register_mux(NULL, "tdm1_mux_clk", tdm_parents,
+	clk = clk_register_mux(NULL, "tdm1_mclk", tdm_parents,
 			ARRAY_SIZE(tdm_parents), 0, SPEAR1310_RAS_CTRL_REG0,
 			SPEAR1310_TDM1_CLK_SHIFT, SPEAR1310_TDM_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "tdm1_mux_clk", NULL);
+	clk_register_clkdev(clk, "tdm1_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "tdm1_clk", "tdm1_mux_clk", 0,
+	clk = clk_register_gate(NULL, "tdm1_clk", "tdm1_mclk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_TDM1_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "tdm_hdlc.0");
 
-	clk = clk_register_mux(NULL, "tdm2_mux_clk", tdm_parents,
+	clk = clk_register_mux(NULL, "tdm2_mclk", tdm_parents,
 			ARRAY_SIZE(tdm_parents), 0, SPEAR1310_RAS_CTRL_REG0,
 			SPEAR1310_TDM2_CLK_SHIFT, SPEAR1310_TDM_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "tdm2_mux_clk", NULL);
+	clk_register_clkdev(clk, "tdm2_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "tdm2_clk", "tdm2_mux_clk", 0,
+	clk = clk_register_gate(NULL, "tdm2_clk", "tdm2_mclk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_TDM2_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "tdm_hdlc.1");
-- 
cgit v1.2.3


From 5cfc545f50c4b6c0800e578b51019f2ecf490f1e Mon Sep 17 00:00:00 2001
From: Vipul Kumar Samar <vipulkumar.samar@st.com>
Date: Tue, 10 Jul 2012 17:12:45 +0530
Subject: Clk:spear3xx:Fix: Rename clk ids within predefined limit

The max limit of con_id is 16 and dev_id is 20. As of now for spear3xx, many clk
ids are exceeding this predefined limit.

This patch is intended to rename clk ids like:
    mux_clk -> _mclk
    gate_clk -> _gclk
    synth_clk -> syn_clk
    ras_gen1_synth_gate_clk -> ras_syn1_gclk
    ras_pll3_48m -> ras_pll3_
    pll3_48m -> pll3_

Signed-off-by: Vipul Kumar Samar <vipulkumar.samar@st.com>
Signed-off-by: Shiraz Hashim <shiraz.hashim@st.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-spear3xx/spear3xx.c  |   2 +-
 drivers/clk/spear/spear3xx_clock.c | 180 ++++++++++++++++++-------------------
 2 files changed, 87 insertions(+), 95 deletions(-)

diff --git a/arch/arm/mach-spear3xx/spear3xx.c b/arch/arm/mach-spear3xx/spear3xx.c
index 0f41bd1c47c3..66db5f13af84 100644
--- a/arch/arm/mach-spear3xx/spear3xx.c
+++ b/arch/arm/mach-spear3xx/spear3xx.c
@@ -87,7 +87,7 @@ void __init spear3xx_map_io(void)
 
 static void __init spear3xx_timer_init(void)
 {
-	char pclk_name[] = "pll3_48m_clk";
+	char pclk_name[] = "pll3_clk";
 	struct clk *gpt_clk, *pclk;
 
 	spear3xx_clk_init();
diff --git a/drivers/clk/spear/spear3xx_clock.c b/drivers/clk/spear/spear3xx_clock.c
index 01dd6daff2a1..c3157454bb3f 100644
--- a/drivers/clk/spear/spear3xx_clock.c
+++ b/drivers/clk/spear/spear3xx_clock.c
@@ -122,12 +122,12 @@ static struct gpt_rate_tbl gpt_rtbl[] = {
 };
 
 /* clock parents */
-static const char *uart0_parents[] = { "pll3_48m_clk", "uart_synth_gate_clk", };
-static const char *firda_parents[] = { "pll3_48m_clk", "firda_synth_gate_clk",
+static const char *uart0_parents[] = { "pll3_clk", "uart_syn_gclk", };
+static const char *firda_parents[] = { "pll3_clk", "firda_syn_gclk",
 };
-static const char *gpt0_parents[] = { "pll3_48m_clk", "gpt0_synth_clk", };
-static const char *gpt1_parents[] = { "pll3_48m_clk", "gpt1_synth_clk", };
-static const char *gpt2_parents[] = { "pll3_48m_clk", "gpt2_synth_clk", };
+static const char *gpt0_parents[] = { "pll3_clk", "gpt0_syn_clk", };
+static const char *gpt1_parents[] = { "pll3_clk", "gpt1_syn_clk", };
+static const char *gpt2_parents[] = { "pll3_clk", "gpt2_syn_clk", };
 static const char *gen2_3_parents[] = { "pll1_clk", "pll2_clk", };
 static const char *ddr_parents[] = { "ahb_clk", "ahbmult2_clk", "none",
 	"pll2_clk", };
@@ -137,7 +137,7 @@ static void __init spear300_clk_init(void)
 {
 	struct clk *clk;
 
-	clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_48m_clk", 0,
+	clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_clk", 0,
 			1, 1);
 	clk_register_clkdev(clk, NULL, "60000000.clcd");
 
@@ -219,15 +219,11 @@ static void __init spear310_clk_init(void)
 	#define SPEAR320_UARTX_PCLK_VAL_SYNTH1		0x0
 	#define SPEAR320_UARTX_PCLK_VAL_APB		0x1
 
-static const char *i2s_ref_parents[] = { "ras_pll2_clk",
-	"ras_gen2_synth_gate_clk", };
-static const char *sdhci_parents[] = { "ras_pll3_48m_clk",
-	"ras_gen3_synth_gate_clk",
-};
+static const char *i2s_ref_parents[] = { "ras_pll2_clk", "ras_syn2_gclk", };
+static const char *sdhci_parents[] = { "ras_pll3_clk", "ras_syn3_gclk", };
 static const char *smii0_parents[] = { "smii_125m_pad", "ras_pll2_clk",
-	"ras_gen0_synth_gate_clk", };
-static const char *uartx_parents[] = { "ras_gen1_synth_gate_clk", "ras_apb_clk",
-};
+	"ras_syn0_gclk", };
+static const char *uartx_parents[] = { "ras_syn1_gclk", "ras_apb_clk", };
 
 static void __init spear320_clk_init(void)
 {
@@ -237,7 +233,7 @@ static void __init spear320_clk_init(void)
 			CLK_IS_ROOT, 125000000);
 	clk_register_clkdev(clk, "smii_125m_pad", NULL);
 
-	clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_48m_clk", 0,
+	clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_clk", 0,
 			1, 1);
 	clk_register_clkdev(clk, NULL, "90000000.clcd");
 
@@ -363,9 +359,9 @@ void __init spear3xx_clk_init(void)
 	clk_register_clkdev(clk, NULL, "fc900000.rtc");
 
 	/* clock derived from 24 MHz osc clk */
-	clk = clk_register_fixed_rate(NULL, "pll3_48m_clk", "osc_24m_clk", 0,
+	clk = clk_register_fixed_rate(NULL, "pll3_clk", "osc_24m_clk", 0,
 			48000000);
-	clk_register_clkdev(clk, "pll3_48m_clk", NULL);
+	clk_register_clkdev(clk, "pll3_clk", NULL);
 
 	clk = clk_register_fixed_factor(NULL, "wdt_clk", "osc_24m_clk", 0, 1,
 			1);
@@ -392,98 +388,98 @@ void __init spear3xx_clk_init(void)
 			HCLK_RATIO_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "ahb_clk", NULL);
 
-	clk = clk_register_aux("uart_synth_clk", "uart_synth_gate_clk",
-			"pll1_clk", 0, UART_CLK_SYNT, NULL, aux_rtbl,
-			ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "uart_synth_clk", NULL);
-	clk_register_clkdev(clk1, "uart_synth_gate_clk", NULL);
+	clk = clk_register_aux("uart_syn_clk", "uart_syn_gclk", "pll1_clk", 0,
+			UART_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl),
+			&_lock, &clk1);
+	clk_register_clkdev(clk, "uart_syn_clk", NULL);
+	clk_register_clkdev(clk1, "uart_syn_gclk", NULL);
 
-	clk = clk_register_mux(NULL, "uart0_mux_clk", uart0_parents,
+	clk = clk_register_mux(NULL, "uart0_mclk", uart0_parents,
 			ARRAY_SIZE(uart0_parents), 0, PERIP_CLK_CFG,
 			UART_CLK_SHIFT, UART_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "uart0_mux_clk", NULL);
+	clk_register_clkdev(clk, "uart0_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "uart0", "uart0_mux_clk", 0,
-			PERIP1_CLK_ENB, UART_CLK_ENB, 0, &_lock);
+	clk = clk_register_gate(NULL, "uart0", "uart0_mclk", 0, PERIP1_CLK_ENB,
+			UART_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "d0000000.serial");
 
-	clk = clk_register_aux("firda_synth_clk", "firda_synth_gate_clk",
-			"pll1_clk", 0, FIRDA_CLK_SYNT, NULL, aux_rtbl,
-			ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "firda_synth_clk", NULL);
-	clk_register_clkdev(clk1, "firda_synth_gate_clk", NULL);
+	clk = clk_register_aux("firda_syn_clk", "firda_syn_gclk", "pll1_clk", 0,
+			FIRDA_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl),
+			&_lock, &clk1);
+	clk_register_clkdev(clk, "firda_syn_clk", NULL);
+	clk_register_clkdev(clk1, "firda_syn_gclk", NULL);
 
-	clk = clk_register_mux(NULL, "firda_mux_clk", firda_parents,
+	clk = clk_register_mux(NULL, "firda_mclk", firda_parents,
 			ARRAY_SIZE(firda_parents), 0, PERIP_CLK_CFG,
 			FIRDA_CLK_SHIFT, FIRDA_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "firda_mux_clk", NULL);
+	clk_register_clkdev(clk, "firda_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "firda_clk", "firda_mux_clk", 0,
+	clk = clk_register_gate(NULL, "firda_clk", "firda_mclk", 0,
 			PERIP1_CLK_ENB, FIRDA_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "firda");
 
 	/* gpt clocks */
-	clk_register_gpt("gpt0_synth_clk", "pll1_clk", 0, PRSC0_CLK_CFG,
-			gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock);
+	clk_register_gpt("gpt0_syn_clk", "pll1_clk", 0, PRSC0_CLK_CFG, gpt_rtbl,
+			ARRAY_SIZE(gpt_rtbl), &_lock);
 	clk = clk_register_mux(NULL, "gpt0_clk", gpt0_parents,
 			ARRAY_SIZE(gpt0_parents), 0, PERIP_CLK_CFG,
 			GPT0_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "gpt0");
 
-	clk_register_gpt("gpt1_synth_clk", "pll1_clk", 0, PRSC1_CLK_CFG,
-			gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock);
-	clk = clk_register_mux(NULL, "gpt1_mux_clk", gpt1_parents,
+	clk_register_gpt("gpt1_syn_clk", "pll1_clk", 0, PRSC1_CLK_CFG, gpt_rtbl,
+			ARRAY_SIZE(gpt_rtbl), &_lock);
+	clk = clk_register_mux(NULL, "gpt1_mclk", gpt1_parents,
 			ARRAY_SIZE(gpt1_parents), 0, PERIP_CLK_CFG,
 			GPT1_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "gpt1_mux_clk", NULL);
-	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mux_clk", 0,
+	clk_register_clkdev(clk, "gpt1_mclk", NULL);
+	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0,
 			PERIP1_CLK_ENB, GPT1_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "gpt1");
 
-	clk_register_gpt("gpt2_synth_clk", "pll1_clk", 0, PRSC2_CLK_CFG,
-			gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock);
-	clk = clk_register_mux(NULL, "gpt2_mux_clk", gpt2_parents,
+	clk_register_gpt("gpt2_syn_clk", "pll1_clk", 0, PRSC2_CLK_CFG, gpt_rtbl,
+			ARRAY_SIZE(gpt_rtbl), &_lock);
+	clk = clk_register_mux(NULL, "gpt2_mclk", gpt2_parents,
 			ARRAY_SIZE(gpt2_parents), 0, PERIP_CLK_CFG,
 			GPT2_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "gpt2_mux_clk", NULL);
-	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mux_clk", 0,
+	clk_register_clkdev(clk, "gpt2_mclk", NULL);
+	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0,
 			PERIP1_CLK_ENB, GPT2_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "gpt2");
 
 	/* general synths clocks */
-	clk = clk_register_aux("gen0_synth_clk", "gen0_synth_gate_clk",
-			"pll1_clk", 0, GEN0_CLK_SYNT, NULL, aux_rtbl,
-			ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "gen0_synth_clk", NULL);
-	clk_register_clkdev(clk1, "gen0_synth_gate_clk", NULL);
-
-	clk = clk_register_aux("gen1_synth_clk", "gen1_synth_gate_clk",
-			"pll1_clk", 0, GEN1_CLK_SYNT, NULL, aux_rtbl,
-			ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "gen1_synth_clk", NULL);
-	clk_register_clkdev(clk1, "gen1_synth_gate_clk", NULL);
-
-	clk = clk_register_mux(NULL, "gen2_3_parent_clk", gen2_3_parents,
+	clk = clk_register_aux("gen0_syn_clk", "gen0_syn_gclk", "pll1_clk",
+			0, GEN0_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl),
+			&_lock, &clk1);
+	clk_register_clkdev(clk, "gen0_syn_clk", NULL);
+	clk_register_clkdev(clk1, "gen0_syn_gclk", NULL);
+
+	clk = clk_register_aux("gen1_syn_clk", "gen1_syn_gclk", "pll1_clk",
+			0, GEN1_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl),
+			&_lock, &clk1);
+	clk_register_clkdev(clk, "gen1_syn_clk", NULL);
+	clk_register_clkdev(clk1, "gen1_syn_gclk", NULL);
+
+	clk = clk_register_mux(NULL, "gen2_3_par_clk", gen2_3_parents,
 			ARRAY_SIZE(gen2_3_parents), 0, CORE_CLK_CFG,
 			GEN_SYNTH2_3_CLK_SHIFT, GEN_SYNTH2_3_CLK_MASK, 0,
 			&_lock);
-	clk_register_clkdev(clk, "gen2_3_parent_clk", NULL);
+	clk_register_clkdev(clk, "gen2_3_par_clk", NULL);
 
-	clk = clk_register_aux("gen2_synth_clk", "gen2_synth_gate_clk",
-			"gen2_3_parent_clk", 0, GEN2_CLK_SYNT, NULL, aux_rtbl,
+	clk = clk_register_aux("gen2_syn_clk", "gen2_syn_gclk",
+			"gen2_3_par_clk", 0, GEN2_CLK_SYNT, NULL, aux_rtbl,
 			ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "gen2_synth_clk", NULL);
-	clk_register_clkdev(clk1, "gen2_synth_gate_clk", NULL);
+	clk_register_clkdev(clk, "gen2_syn_clk", NULL);
+	clk_register_clkdev(clk1, "gen2_syn_gclk", NULL);
 
-	clk = clk_register_aux("gen3_synth_clk", "gen3_synth_gate_clk",
-			"gen2_3_parent_clk", 0, GEN3_CLK_SYNT, NULL, aux_rtbl,
+	clk = clk_register_aux("gen3_syn_clk", "gen3_syn_gclk",
+			"gen2_3_par_clk", 0, GEN3_CLK_SYNT, NULL, aux_rtbl,
 			ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "gen3_synth_clk", NULL);
-	clk_register_clkdev(clk1, "gen3_synth_gate_clk", NULL);
+	clk_register_clkdev(clk, "gen3_syn_clk", NULL);
+	clk_register_clkdev(clk1, "gen3_syn_gclk", NULL);
 
 	/* clock derived from pll3 clk */
-	clk = clk_register_gate(NULL, "usbh_clk", "pll3_48m_clk", 0,
-			PERIP1_CLK_ENB, USBH_CLK_ENB, 0, &_lock);
+	clk = clk_register_gate(NULL, "usbh_clk", "pll3_clk", 0, PERIP1_CLK_ENB,
+			USBH_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, "usbh_clk", NULL);
 
 	clk = clk_register_fixed_factor(NULL, "usbh.0_clk", "usbh_clk", 0, 1,
@@ -494,8 +490,8 @@ void __init spear3xx_clk_init(void)
 			1);
 	clk_register_clkdev(clk, "usbh.1_clk", NULL);
 
-	clk = clk_register_gate(NULL, "usbd_clk", "pll3_48m_clk", 0,
-			PERIP1_CLK_ENB, USBD_CLK_ENB, 0, &_lock);
+	clk = clk_register_gate(NULL, "usbd_clk", "pll3_clk", 0, PERIP1_CLK_ENB,
+			USBD_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "designware_udc");
 
 	/* clock derived from ahb clk */
@@ -579,29 +575,25 @@ void __init spear3xx_clk_init(void)
 			RAS_CLK_ENB, RAS_PLL2_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, "ras_pll2_clk", NULL);
 
-	clk = clk_register_gate(NULL, "ras_pll3_48m_clk", "pll3_48m_clk", 0,
+	clk = clk_register_gate(NULL, "ras_pll3_clk", "pll3_clk", 0,
 			RAS_CLK_ENB, RAS_48M_CLK_ENB, 0, &_lock);
-	clk_register_clkdev(clk, "ras_pll3_48m_clk", NULL);
-
-	clk = clk_register_gate(NULL, "ras_gen0_synth_gate_clk",
-			"gen0_synth_gate_clk", 0, RAS_CLK_ENB,
-			RAS_SYNT0_CLK_ENB, 0, &_lock);
-	clk_register_clkdev(clk, "ras_gen0_synth_gate_clk", NULL);
-
-	clk = clk_register_gate(NULL, "ras_gen1_synth_gate_clk",
-			"gen1_synth_gate_clk", 0, RAS_CLK_ENB,
-			RAS_SYNT1_CLK_ENB, 0, &_lock);
-	clk_register_clkdev(clk, "ras_gen1_synth_gate_clk", NULL);
-
-	clk = clk_register_gate(NULL, "ras_gen2_synth_gate_clk",
-			"gen2_synth_gate_clk", 0, RAS_CLK_ENB,
-			RAS_SYNT2_CLK_ENB, 0, &_lock);
-	clk_register_clkdev(clk, "ras_gen2_synth_gate_clk", NULL);
-
-	clk = clk_register_gate(NULL, "ras_gen3_synth_gate_clk",
-			"gen3_synth_gate_clk", 0, RAS_CLK_ENB,
-			RAS_SYNT3_CLK_ENB, 0, &_lock);
-	clk_register_clkdev(clk, "ras_gen3_synth_gate_clk", NULL);
+	clk_register_clkdev(clk, "ras_pll3_clk", NULL);
+
+	clk = clk_register_gate(NULL, "ras_syn0_gclk", "gen0_syn_gclk", 0,
+			RAS_CLK_ENB, RAS_SYNT0_CLK_ENB, 0, &_lock);
+	clk_register_clkdev(clk, "ras_syn0_gclk", NULL);
+
+	clk = clk_register_gate(NULL, "ras_syn1_gclk", "gen1_syn_gclk", 0,
+			RAS_CLK_ENB, RAS_SYNT1_CLK_ENB, 0, &_lock);
+	clk_register_clkdev(clk, "ras_syn1_gclk", NULL);
+
+	clk = clk_register_gate(NULL, "ras_syn2_gclk", "gen2_syn_gclk", 0,
+			RAS_CLK_ENB, RAS_SYNT2_CLK_ENB, 0, &_lock);
+	clk_register_clkdev(clk, "ras_syn2_gclk", NULL);
+
+	clk = clk_register_gate(NULL, "ras_syn3_gclk", "gen3_syn_gclk", 0,
+			RAS_CLK_ENB, RAS_SYNT3_CLK_ENB, 0, &_lock);
+	clk_register_clkdev(clk, "ras_syn3_gclk", NULL);
 
 	if (of_machine_is_compatible("st,spear300"))
 		spear300_clk_init();
-- 
cgit v1.2.3


From a8f4bf0eb4ca7a0a578079fb5807e59b7111a1e2 Mon Sep 17 00:00:00 2001
From: Vipul Kumar Samar <vipulkumar.samar@st.com>
Date: Tue, 10 Jul 2012 17:12:46 +0530
Subject: Clk:spear6xx:Fix: Rename clk ids within predefined limit

The max limit of con_id is 16 and dev_id is 20. As of now for spear6xx, many clk
ids are exceeding this predefined limit.

This patch is intended to rename clk ids like:
    mux_clk -> _mclk
    gate_clk -> _gclk
    synth_clk -> syn_clk
    ras_gen1_synth_gate_clk -> ras_syn1_gclk
    pll3_48m -> pll3_

Signed-off-by: Vipul Kumar Samar <vipulkumar.samar@st.com>
Signed-off-by: Shiraz Hashim <shiraz.hashim@st.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-spear6xx/spear6xx.c  |   2 +-
 drivers/clk/spear/spear6xx_clock.c | 122 ++++++++++++++++++-------------------
 2 files changed, 61 insertions(+), 63 deletions(-)

diff --git a/arch/arm/mach-spear6xx/spear6xx.c b/arch/arm/mach-spear6xx/spear6xx.c
index 2e2e3596583e..9af67d003c62 100644
--- a/arch/arm/mach-spear6xx/spear6xx.c
+++ b/arch/arm/mach-spear6xx/spear6xx.c
@@ -423,7 +423,7 @@ void __init spear6xx_map_io(void)
 
 static void __init spear6xx_timer_init(void)
 {
-	char pclk_name[] = "pll3_48m_clk";
+	char pclk_name[] = "pll3_clk";
 	struct clk *gpt_clk, *pclk;
 
 	spear6xx_clk_init();
diff --git a/drivers/clk/spear/spear6xx_clock.c b/drivers/clk/spear/spear6xx_clock.c
index 61026ae564ab..a98d0866f541 100644
--- a/drivers/clk/spear/spear6xx_clock.c
+++ b/drivers/clk/spear/spear6xx_clock.c
@@ -97,13 +97,12 @@ static struct aux_rate_tbl aux_rtbl[] = {
 	{.xscale = 1, .yscale = 2, .eq = 1}, /* 166 MHz */
 };
 
-static const char *clcd_parents[] = { "pll3_48m_clk", "clcd_synth_gate_clk", };
-static const char *firda_parents[] = { "pll3_48m_clk", "firda_synth_gate_clk",
-};
-static const char *uart_parents[] = { "pll3_48m_clk", "uart_synth_gate_clk", };
-static const char *gpt0_1_parents[] = { "pll3_48m_clk", "gpt0_1_synth_clk", };
-static const char *gpt2_parents[] = { "pll3_48m_clk", "gpt2_synth_clk", };
-static const char *gpt3_parents[] = { "pll3_48m_clk", "gpt3_synth_clk", };
+static const char *clcd_parents[] = { "pll3_clk", "clcd_syn_gclk", };
+static const char *firda_parents[] = { "pll3_clk", "firda_syn_gclk", };
+static const char *uart_parents[] = { "pll3_clk", "uart_syn_gclk", };
+static const char *gpt0_1_parents[] = { "pll3_clk", "gpt0_1_syn_clk", };
+static const char *gpt2_parents[] = { "pll3_clk", "gpt2_syn_clk", };
+static const char *gpt3_parents[] = { "pll3_clk", "gpt3_syn_clk", };
 static const char *ddr_parents[] = { "ahb_clk", "ahbmult2_clk", "none",
 	"pll2_clk", };
 
@@ -136,9 +135,9 @@ void __init spear6xx_clk_init(void)
 	clk_register_clkdev(clk, NULL, "rtc-spear");
 
 	/* clock derived from 30 MHz osc clk */
-	clk = clk_register_fixed_rate(NULL, "pll3_48m_clk", "osc_24m_clk", 0,
+	clk = clk_register_fixed_rate(NULL, "pll3_clk", "osc_24m_clk", 0,
 			48000000);
-	clk_register_clkdev(clk, "pll3_48m_clk", NULL);
+	clk_register_clkdev(clk, "pll3_clk", NULL);
 
 	clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "osc_30m_clk",
 			0, PLL1_CTR, PLL1_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl),
@@ -146,9 +145,9 @@ void __init spear6xx_clk_init(void)
 	clk_register_clkdev(clk, "vco1_clk", NULL);
 	clk_register_clkdev(clk1, "pll1_clk", NULL);
 
-	clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL,
-			"osc_30m_clk", 0, PLL2_CTR, PLL2_FRQ, pll_rtbl,
-			ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL);
+	clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "osc_30m_clk",
+			0, PLL2_CTR, PLL2_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl),
+			&_lock, &clk1, NULL);
 	clk_register_clkdev(clk, "vco2_clk", NULL);
 	clk_register_clkdev(clk1, "pll2_clk", NULL);
 
@@ -165,111 +164,111 @@ void __init spear6xx_clk_init(void)
 			HCLK_RATIO_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "ahb_clk", NULL);
 
-	clk = clk_register_aux("uart_synth_clk", "uart_synth_gate_clk",
-			"pll1_clk", 0, UART_CLK_SYNT, NULL, aux_rtbl,
-			ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "uart_synth_clk", NULL);
-	clk_register_clkdev(clk1, "uart_synth_gate_clk", NULL);
+	clk = clk_register_aux("uart_syn_clk", "uart_syn_gclk", "pll1_clk", 0,
+			UART_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl),
+			&_lock, &clk1);
+	clk_register_clkdev(clk, "uart_syn_clk", NULL);
+	clk_register_clkdev(clk1, "uart_syn_gclk", NULL);
 
-	clk = clk_register_mux(NULL, "uart_mux_clk", uart_parents,
+	clk = clk_register_mux(NULL, "uart_mclk", uart_parents,
 			ARRAY_SIZE(uart_parents), 0, PERIP_CLK_CFG,
 			UART_CLK_SHIFT, UART_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "uart_mux_clk", NULL);
+	clk_register_clkdev(clk, "uart_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "uart0", "uart_mux_clk", 0,
-			PERIP1_CLK_ENB, UART0_CLK_ENB, 0, &_lock);
+	clk = clk_register_gate(NULL, "uart0", "uart_mclk", 0, PERIP1_CLK_ENB,
+			UART0_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "d0000000.serial");
 
-	clk = clk_register_gate(NULL, "uart1", "uart_mux_clk", 0,
-			PERIP1_CLK_ENB, UART1_CLK_ENB, 0, &_lock);
+	clk = clk_register_gate(NULL, "uart1", "uart_mclk", 0, PERIP1_CLK_ENB,
+			UART1_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "d0080000.serial");
 
-	clk = clk_register_aux("firda_synth_clk", "firda_synth_gate_clk",
-			"pll1_clk", 0, FIRDA_CLK_SYNT, NULL, aux_rtbl,
-			ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "firda_synth_clk", NULL);
-	clk_register_clkdev(clk1, "firda_synth_gate_clk", NULL);
+	clk = clk_register_aux("firda_syn_clk", "firda_syn_gclk", "pll1_clk",
+			0, FIRDA_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl),
+			&_lock, &clk1);
+	clk_register_clkdev(clk, "firda_syn_clk", NULL);
+	clk_register_clkdev(clk1, "firda_syn_gclk", NULL);
 
-	clk = clk_register_mux(NULL, "firda_mux_clk", firda_parents,
+	clk = clk_register_mux(NULL, "firda_mclk", firda_parents,
 			ARRAY_SIZE(firda_parents), 0, PERIP_CLK_CFG,
 			FIRDA_CLK_SHIFT, FIRDA_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "firda_mux_clk", NULL);
+	clk_register_clkdev(clk, "firda_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "firda_clk", "firda_mux_clk", 0,
+	clk = clk_register_gate(NULL, "firda_clk", "firda_mclk", 0,
 			PERIP1_CLK_ENB, FIRDA_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "firda");
 
-	clk = clk_register_aux("clcd_synth_clk", "clcd_synth_gate_clk",
-			"pll1_clk", 0, CLCD_CLK_SYNT, NULL, aux_rtbl,
-			ARRAY_SIZE(aux_rtbl), &_lock, &clk1);
-	clk_register_clkdev(clk, "clcd_synth_clk", NULL);
-	clk_register_clkdev(clk1, "clcd_synth_gate_clk", NULL);
+	clk = clk_register_aux("clcd_syn_clk", "clcd_syn_gclk", "pll1_clk",
+			0, CLCD_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl),
+			&_lock, &clk1);
+	clk_register_clkdev(clk, "clcd_syn_clk", NULL);
+	clk_register_clkdev(clk1, "clcd_syn_gclk", NULL);
 
-	clk = clk_register_mux(NULL, "clcd_mux_clk", clcd_parents,
+	clk = clk_register_mux(NULL, "clcd_mclk", clcd_parents,
 			ARRAY_SIZE(clcd_parents), 0, PERIP_CLK_CFG,
 			CLCD_CLK_SHIFT, CLCD_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "clcd_mux_clk", NULL);
+	clk_register_clkdev(clk, "clcd_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "clcd_clk", "clcd_mux_clk", 0,
+	clk = clk_register_gate(NULL, "clcd_clk", "clcd_mclk", 0,
 			PERIP1_CLK_ENB, CLCD_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "clcd");
 
 	/* gpt clocks */
-	clk = clk_register_gpt("gpt0_1_synth_clk", "pll1_clk", 0, PRSC0_CLK_CFG,
+	clk = clk_register_gpt("gpt0_1_syn_clk", "pll1_clk", 0, PRSC0_CLK_CFG,
 			gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock);
-	clk_register_clkdev(clk, "gpt0_1_synth_clk", NULL);
+	clk_register_clkdev(clk, "gpt0_1_syn_clk", NULL);
 
-	clk = clk_register_mux(NULL, "gpt0_mux_clk", gpt0_1_parents,
+	clk = clk_register_mux(NULL, "gpt0_mclk", gpt0_1_parents,
 			ARRAY_SIZE(gpt0_1_parents), 0, PERIP_CLK_CFG,
 			GPT0_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "gpt0");
 
-	clk = clk_register_mux(NULL, "gpt1_mux_clk", gpt0_1_parents,
+	clk = clk_register_mux(NULL, "gpt1_mclk", gpt0_1_parents,
 			ARRAY_SIZE(gpt0_1_parents), 0, PERIP_CLK_CFG,
 			GPT1_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "gpt1_mux_clk", NULL);
+	clk_register_clkdev(clk, "gpt1_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mux_clk", 0,
+	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0,
 			PERIP1_CLK_ENB, GPT1_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "gpt1");
 
-	clk = clk_register_gpt("gpt2_synth_clk", "pll1_clk", 0, PRSC1_CLK_CFG,
+	clk = clk_register_gpt("gpt2_syn_clk", "pll1_clk", 0, PRSC1_CLK_CFG,
 			gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock);
-	clk_register_clkdev(clk, "gpt2_synth_clk", NULL);
+	clk_register_clkdev(clk, "gpt2_syn_clk", NULL);
 
-	clk = clk_register_mux(NULL, "gpt2_mux_clk", gpt2_parents,
+	clk = clk_register_mux(NULL, "gpt2_mclk", gpt2_parents,
 			ARRAY_SIZE(gpt2_parents), 0, PERIP_CLK_CFG,
 			GPT2_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "gpt2_mux_clk", NULL);
+	clk_register_clkdev(clk, "gpt2_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mux_clk", 0,
+	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0,
 			PERIP1_CLK_ENB, GPT2_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "gpt2");
 
-	clk = clk_register_gpt("gpt3_synth_clk", "pll1_clk", 0, PRSC2_CLK_CFG,
+	clk = clk_register_gpt("gpt3_syn_clk", "pll1_clk", 0, PRSC2_CLK_CFG,
 			gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock);
-	clk_register_clkdev(clk, "gpt3_synth_clk", NULL);
+	clk_register_clkdev(clk, "gpt3_syn_clk", NULL);
 
-	clk = clk_register_mux(NULL, "gpt3_mux_clk", gpt3_parents,
+	clk = clk_register_mux(NULL, "gpt3_mclk", gpt3_parents,
 			ARRAY_SIZE(gpt3_parents), 0, PERIP_CLK_CFG,
 			GPT3_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
-	clk_register_clkdev(clk, "gpt3_mux_clk", NULL);
+	clk_register_clkdev(clk, "gpt3_mclk", NULL);
 
-	clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mux_clk", 0,
+	clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mclk", 0,
 			PERIP1_CLK_ENB, GPT3_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "gpt3");
 
 	/* clock derived from pll3 clk */
-	clk = clk_register_gate(NULL, "usbh0_clk", "pll3_48m_clk", 0,
+	clk = clk_register_gate(NULL, "usbh0_clk", "pll3_clk", 0,
 			PERIP1_CLK_ENB, USBH0_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "usbh.0_clk");
 
-	clk = clk_register_gate(NULL, "usbh1_clk", "pll3_48m_clk", 0,
+	clk = clk_register_gate(NULL, "usbh1_clk", "pll3_clk", 0,
 			PERIP1_CLK_ENB, USBH1_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "usbh.1_clk");
 
-	clk = clk_register_gate(NULL, "usbd_clk", "pll3_48m_clk", 0,
-			PERIP1_CLK_ENB, USBD_CLK_ENB, 0, &_lock);
+	clk = clk_register_gate(NULL, "usbd_clk", "pll3_clk", 0, PERIP1_CLK_ENB,
+			USBD_CLK_ENB, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "designware_udc");
 
 	/* clock derived from ahb clk */
@@ -278,9 +277,8 @@ void __init spear6xx_clk_init(void)
 	clk_register_clkdev(clk, "ahbmult2_clk", NULL);
 
 	clk = clk_register_mux(NULL, "ddr_clk", ddr_parents,
-			ARRAY_SIZE(ddr_parents),
-			0, PLL_CLK_CFG, MCTR_CLK_SHIFT, MCTR_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(ddr_parents), 0, PLL_CLK_CFG, MCTR_CLK_SHIFT,
+			MCTR_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "ddr_clk", NULL);
 
 	clk = clk_register_divider(NULL, "apb_clk", "ahb_clk",
-- 
cgit v1.2.3


From 465e4f2b19159533e08042f8a113769512385195 Mon Sep 17 00:00:00 2001
From: Vipul Kumar Samar <vipulkumar.samar@st.com>
Date: Wed, 4 Jul 2012 18:52:17 +0800
Subject: ARM: SPEAr13xx: Fix Interrupt bindings

   - Correct interrupt bindings for uart, ethernet and pmu.
   - Added interrupt binding for keyboard.

Signed-off-by: Vipul Kumar Samar <vipulkumar.samar@st.com>
Signed-off-by: Shiraz Hashim <shiraz.hashim@st.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 arch/arm/boot/dts/spear13xx.dtsi | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi
index 10dcec7e7321..f7b84aced654 100644
--- a/arch/arm/boot/dts/spear13xx.dtsi
+++ b/arch/arm/boot/dts/spear13xx.dtsi
@@ -43,8 +43,8 @@
 
 	pmu {
 		compatible = "arm,cortex-a9-pmu";
-		interrupts = <0 8 0x04
-			      0 9 0x04>;
+		interrupts = <0 6 0x04
+			      0 7 0x04>;
 	};
 
 	L2: l2-cache {
@@ -119,8 +119,8 @@
 		gmac0: eth@e2000000 {
 			compatible = "st,spear600-gmac";
 			reg = <0xe2000000 0x8000>;
-			interrupts = <0 23 0x4
-				      0 24 0x4>;
+			interrupts = <0 33 0x4
+				      0 34 0x4>;
 			interrupt-names = "macirq", "eth_wake_irq";
 			status = "disabled";
 		};
@@ -202,6 +202,7 @@
 			kbd@e0300000 {
 				compatible = "st,spear300-kbd";
 				reg = <0xe0300000 0x1000>;
+				interrupts = <0 52 0x4>;
 				status = "disabled";
 			};
 
@@ -224,7 +225,7 @@
 			serial@e0000000 {
 				compatible = "arm,pl011", "arm,primecell";
 				reg = <0xe0000000 0x1000>;
-				interrupts = <0 36 0x4>;
+				interrupts = <0 35 0x4>;
 				status = "disabled";
 			};
 
-- 
cgit v1.2.3


From d9ba8db2157654e2fc159a63c4b6eb8cffb352ae Mon Sep 17 00:00:00 2001
From: Vipul Kumar Samar <vipulkumar.samar@st.com>
Date: Wed, 4 Jul 2012 18:52:19 +0800
Subject: clk: SPEAr1340: Fix clk enable register for uart1 and i2c1.

This patch is to fix typing mistake of clk enable register of i2c1 and
uart1.

Signed-off-by: Vipul Kumar Samar <vipulkumar.samar@st.com>
Signed-off-by: Shiraz Hashim <shiraz.hashim@st.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/clk/spear/spear1340_clock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/spear/spear1340_clock.c b/drivers/clk/spear/spear1340_clock.c
index 0f2324b9321b..44846987ee64 100644
--- a/drivers/clk/spear/spear1340_clock.c
+++ b/drivers/clk/spear/spear1340_clock.c
@@ -617,7 +617,7 @@ void __init spear1340_clk_init(void)
 	clk_register_clkdev(clk, "uart1_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "uart1_clk", "uart1_mclk", 0,
-			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_UART1_CLK_ENB, 0,
+			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_UART1_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "b4100000.serial");
 
@@ -741,7 +741,7 @@ void __init spear1340_clk_init(void)
 	clk_register_clkdev(clk, NULL, "e0280000.i2c");
 
 	clk = clk_register_gate(NULL, "i2c1_clk", "ahb_clk", 0,
-			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_I2C1_CLK_ENB, 0,
+			SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_I2C1_CLK_ENB, 0,
 			&_lock);
 	clk_register_clkdev(clk, NULL, "b4000000.i2c");
 
-- 
cgit v1.2.3


From d4f513ff12c1d74b379715e78c01002f5d055315 Mon Sep 17 00:00:00 2001
From: Vipul Kumar Samar <vipulkumar.samar@st.com>
Date: Fri, 6 Jul 2012 15:52:36 +0530
Subject: Clk: SPEAr1340: Update sys clock parent array

sys_clk has multiple parents and selection of parent depends on sys_clk_ctrl
register bit no. 23:25, with following possibilities

   0XX: pll1_clk
   10X: sys_synth_clk
   110: pll2_clk
   111: pll3_clk

Out of several possibilities (h/w wise) to select same clock parent for
sys_clk, current clock implementation was considering just one value.

When bootloader programmed different (valid) value to select a clock
parent then Linux breaks.

Here, we try to include all possibilities which can lead to same
clock selection thus making Linux independent of bootloader selection
values.

Signed-off-by: Vipul Kumar Samar <vipulkumar.samar@st.com>
Signed-off-by: Shiraz Hashim <shiraz.hashim@st.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/clk/spear/spear1340_clock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/spear/spear1340_clock.c b/drivers/clk/spear/spear1340_clock.c
index 44846987ee64..2352cee7f645 100644
--- a/drivers/clk/spear/spear1340_clock.c
+++ b/drivers/clk/spear/spear1340_clock.c
@@ -369,8 +369,8 @@ static struct frac_rate_tbl gen_rtbl[] = {
 
 /* clock parents */
 static const char *vco_parents[] = { "osc_24m_clk", "osc_25m_clk", };
-static const char *sys_parents[] = { "none", "pll1_clk", "none", "none",
-	"sys_syn_clk", "none", "pll2_clk", "pll3_clk", };
+static const char *sys_parents[] = { "pll1_clk", "pll1_clk", "pll1_clk",
+	"pll1_clk", "sys_synth_clk", "sys_synth_clk", "pll2_clk", "pll3_clk", };
 static const char *ahb_parents[] = { "cpu_div3_clk", "amba_syn_clk", };
 static const char *gpt_parents[] = { "osc_24m_clk", "apb_clk", };
 static const char *uart0_parents[] = { "pll5_clk", "osc_24m_clk",
-- 
cgit v1.2.3


From 45a5e119ad781afca186fd5dccdb70c3c70057a7 Mon Sep 17 00:00:00 2001
From: Vipul Kumar Samar <vipulkumar.samar@st.com>
Date: Fri, 13 Jul 2012 17:20:46 +0530
Subject: ARM: dts: SPEAr320: Fix compatible string

Signed-off-by: Vipul Kumar Samar <vipulkumar.samar@st.com>
Signed-off-by: Shiraz Hashim <shiraz.hashim@st.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 arch/arm/boot/dts/spear320-evb.dts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/spear320-evb.dts b/arch/arm/boot/dts/spear320-evb.dts
index c13fd1f3b09f..f28bea8a3ce2 100644
--- a/arch/arm/boot/dts/spear320-evb.dts
+++ b/arch/arm/boot/dts/spear320-evb.dts
@@ -15,8 +15,8 @@
 /include/ "spear320.dtsi"
 
 / {
-	model = "ST SPEAr300 Evaluation Board";
-	compatible = "st,spear300-evb", "st,spear300";
+	model = "ST SPEAr320 Evaluation Board";
+	compatible = "st,spear320-evb", "st,spear320";
 	#address-cells = <1>;
 	#size-cells = <1>;
 
-- 
cgit v1.2.3


From 69da52f7eabbb9808ccaf7098ae676429f924e7d Mon Sep 17 00:00:00 2001
From: Vipul Kumar Samar <vipulkumar.samar@st.com>
Date: Fri, 13 Jul 2012 17:22:11 +0530
Subject: ARM: dts: SPEAr320: Boot the board in EXTENDED_MODE

On spear320 device supported mode are:

   * AUTO_NET_SMII_MODE
   * AUTO_NET_MII_MODE
   * AUTO_EXP_MODE
   * SMALL_PRINTERS_MODE
   * EXTENDED_MODE

spear320-evb board is designed for EXTENDED_MODE only, hence it does not
boot correctly in current form where pinctrl part for some devices fail.

Configure and boot the SPEAr320 evaluation board in EXTENDED_MODE.

Signed-off-by: Vipul Kumar Samar <vipulkumar.samar@st.com>
Signed-off-by: Shiraz Hashim <shiraz.hashim@st.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 arch/arm/boot/dts/spear320-evb.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/spear320-evb.dts b/arch/arm/boot/dts/spear320-evb.dts
index f28bea8a3ce2..e4e912f95024 100644
--- a/arch/arm/boot/dts/spear320-evb.dts
+++ b/arch/arm/boot/dts/spear320-evb.dts
@@ -26,7 +26,7 @@
 
 	ahb {
 		pinmux@b3000000 {
-			st,pinmux-mode = <3>;
+			st,pinmux-mode = <4>;
 			pinctrl-names = "default";
 			pinctrl-0 = <&state_default>;
 
-- 
cgit v1.2.3


From 69c7e3772eaee5d2097725cdb79bc3ef867c0d9e Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Fri, 11 May 2012 10:41:01 +0200
Subject: ARM: SPEAr600: Fix timer interrupt definition in spear600.dtsi

Signed-off-by: Stefan Roese <sr@denx.de>
Signed-off-by: Shiraz Hashim <shiraz.hashim@st.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 arch/arm/boot/dts/spear600.dtsi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/boot/dts/spear600.dtsi b/arch/arm/boot/dts/spear600.dtsi
index 089f0a42c50e..a3c36e47d7ef 100644
--- a/arch/arm/boot/dts/spear600.dtsi
+++ b/arch/arm/boot/dts/spear600.dtsi
@@ -181,6 +181,7 @@
 			timer@f0000000 {
 				compatible = "st,spear-timer";
 				reg = <0xf0000000 0x400>;
+				interrupt-parent = <&vic0>;
 				interrupts = <16>;
 			};
 		};
-- 
cgit v1.2.3


From 331ae4962b975246944ea039697a8f1cadce42bb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Wed, 18 Jul 2012 09:31:36 +0100
Subject: ext4: fix duplicated mnt_drop_write call in EXT4_IOC_MOVE_EXT

Caused, AFAICS, by mismerge in commit ff9cb1c4eead ("Merge branch
'for_linus' into for_linus_merged")

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org  # 3.3+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext4/ioctl.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e34deac3f366..6ec6f9ee2fec 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -268,7 +268,6 @@ group_extend_out:
 		err = ext4_move_extents(filp, donor_filp, me.orig_start,
 					me.donor_start, me.len, &me.moved_len);
 		mnt_drop_write_file(filp);
-		mnt_drop_write(filp->f_path.mnt);
 
 		if (copy_to_user((struct move_extent __user *)arg,
 				 &me, sizeof(me)))
-- 
cgit v1.2.3


From 89d7ae34cdda4195809a5a987f697a517a2a3177 Mon Sep 17 00:00:00 2001
From: Paul Moore <pmoore@redhat.com>
Date: Tue, 17 Jul 2012 11:07:47 +0000
Subject: cipso: don't follow a NULL pointer when setsockopt() is called

As reported by Alan Cox, and verified by Lin Ming, when a user
attempts to add a CIPSO option to a socket using the CIPSO_V4_TAG_LOCAL
tag the kernel dies a terrible death when it attempts to follow a NULL
pointer (the skb argument to cipso_v4_validate() is NULL when called via
the setsockopt() syscall).

This patch fixes this by first checking to ensure that the skb is
non-NULL before using it to find the incoming network interface.  In
the unlikely case where the skb is NULL and the user attempts to add
a CIPSO option with the _TAG_LOCAL tag we return an error as this is
not something we want to allow.

A simple reproducer, kindly supplied by Lin Ming, although you must
have the CIPSO DOI #3 configure on the system first or you will be
caught early in cipso_v4_validate():

	#include <sys/types.h>
	#include <sys/socket.h>
	#include <linux/ip.h>
	#include <linux/in.h>
	#include <string.h>

	struct local_tag {
		char type;
		char length;
		char info[4];
	};

	struct cipso {
		char type;
		char length;
		char doi[4];
		struct local_tag local;
	};

	int main(int argc, char **argv)
	{
		int sockfd;
		struct cipso cipso = {
			.type = IPOPT_CIPSO,
			.length = sizeof(struct cipso),
			.local = {
				.type = 128,
				.length = sizeof(struct local_tag),
			},
		};

		memset(cipso.doi, 0, 4);
		cipso.doi[3] = 3;

		sockfd = socket(AF_INET, SOCK_DGRAM, 0);
		#define SOL_IP 0
		setsockopt(sockfd, SOL_IP, IP_OPTIONS,
			&cipso, sizeof(struct cipso));

		return 0;
	}

CC: Lin Ming <mlin@ss.pku.edu.cn>
Reported-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Paul Moore <pmoore@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/cipso_ipv4.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index c48adc565e92..667c1d4ca984 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1725,8 +1725,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
 		case CIPSO_V4_TAG_LOCAL:
 			/* This is a non-standard tag that we only allow for
 			 * local connections, so if the incoming interface is
-			 * not the loopback device drop the packet. */
-			if (!(skb->dev->flags & IFF_LOOPBACK)) {
+			 * not the loopback device drop the packet. Further,
+			 * there is no legitimate reason for setting this from
+			 * userspace so reject it if skb is NULL. */
+			if (skb == NULL || !(skb->dev->flags & IFF_LOOPBACK)) {
 				err_offset = opt_iter;
 				goto validate_return_locked;
 			}
-- 
cgit v1.2.3


From 2ab1c24bbd1bae22e0a54beba0cbb12272d940e4 Mon Sep 17 00:00:00 2001
From: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Date: Tue, 17 Jul 2012 09:22:09 +0000
Subject: MAINTAINERS: Changes in qlcnic and qlge maintainers list

Please apply.

Thanks.

Signed-off-by: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index c82c343168e8..fe643e7b9df6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5565,7 +5565,7 @@ F:	Documentation/networking/LICENSE.qla3xxx
 F:	drivers/net/ethernet/qlogic/qla3xxx.*
 
 QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER
-M:	Anirban Chakraborty <anirban.chakraborty@qlogic.com>
+M:	Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
 M:	Sony Chacko <sony.chacko@qlogic.com>
 M:	linux-driver@qlogic.com
 L:	netdev@vger.kernel.org
@@ -5573,7 +5573,6 @@ S:	Supported
 F:	drivers/net/ethernet/qlogic/qlcnic/
 
 QLOGIC QLGE 10Gb ETHERNET DRIVER
-M:	Anirban Chakraborty <anirban.chakraborty@qlogic.com>
 M:	Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
 M:	Ron Mercer <ron.mercer@qlogic.com>
 M:	linux-driver@qlogic.com
-- 
cgit v1.2.3


From c1e3209623ccd92f2f391a31ecf3895daa0238f3 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Wed, 11 Jul 2012 14:12:45 +0200
Subject: v4l2-dev: forgot to add VIDIOC_DV_TIMINGS_CAP.

The VIDIOC_DV_TIMINGS_CAP ioctl check wasn't added to determine_valid_ioctls().
This caused this ioctl to always return -ENOTTY.

The cause for this was that for 3.5 two patch series were merged, one
changing V4L2 core ioctl handling and one adding new functionality, and
some of the new functionality wasn't handled by the new V4L2 core code.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
[ Taking it directly due to vacations  - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/media/video/v4l2-dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index 83dbb2ddff10..0cbada18f6f5 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -681,6 +681,7 @@ static void determine_valid_ioctls(struct video_device *vdev)
 	SET_VALID_IOCTL(ops, VIDIOC_G_DV_TIMINGS, vidioc_g_dv_timings);
 	SET_VALID_IOCTL(ops, VIDIOC_ENUM_DV_TIMINGS, vidioc_enum_dv_timings);
 	SET_VALID_IOCTL(ops, VIDIOC_QUERY_DV_TIMINGS, vidioc_query_dv_timings);
+	SET_VALID_IOCTL(ops, VIDIOC_DV_TIMINGS_CAP, vidioc_dv_timings_cap);
 	/* yes, really vidioc_subscribe_event */
 	SET_VALID_IOCTL(ops, VIDIOC_DQEVENT, vidioc_subscribe_event);
 	SET_VALID_IOCTL(ops, VIDIOC_SUBSCRIBE_EVENT, vidioc_subscribe_event);
-- 
cgit v1.2.3


From 734b65417b24d6eea3e3d7457e1f11493890ee1d Mon Sep 17 00:00:00 2001
From: "Rustad, Mark D" <mark.d.rustad@intel.com>
Date: Wed, 18 Jul 2012 09:06:07 +0000
Subject: net: Statically initialize init_net.dev_base_head

This change eliminates an initialization-order hazard most
recently seen when netprio_cgroup is built into the kernel.

With thanks to Eric Dumazet for catching a bug.

Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c           | 3 ++-
 net/core/net_namespace.c | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 0f28a9e0b8ad..1cb0d8a6aa6c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6283,7 +6283,8 @@ static struct hlist_head *netdev_create_hash(void)
 /* Initialize per network namespace state */
 static int __net_init netdev_init(struct net *net)
 {
-	INIT_LIST_HEAD(&net->dev_base_head);
+	if (net != &init_net)
+		INIT_LIST_HEAD(&net->dev_base_head);
 
 	net->dev_name_head = netdev_create_hash();
 	if (net->dev_name_head == NULL)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index dddbacb8f28c..42f1e1c7514f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -27,7 +27,9 @@ static DEFINE_MUTEX(net_mutex);
 LIST_HEAD(net_namespace_list);
 EXPORT_SYMBOL_GPL(net_namespace_list);
 
-struct net init_net;
+struct net init_net = {
+	.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
+};
 EXPORT_SYMBOL(init_net);
 
 #define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */
-- 
cgit v1.2.3


From eea03c20ae38a55405c0865ed9adfccc400e4c8e Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 18 Jul 2012 18:15:46 -0700
Subject: Make wait_for_device_probe() also do scsi_complete_async_scans()

Commit a7a20d103994 ("sd: limit the scope of the async probe domain")
make the SCSI device probing run device discovery in it's own async
domain.

However, as a result, the partition detection was no longer synchronized
by async_synchronize_full() (which, despite the name, only synchronizes
the global async space, not all of them).  Which in turn meant that
"wait_for_device_probe()" would not wait for the SCSI partitions to be
parsed.

And "wait_for_device_probe()" was what the boot time init code relied on
for mounting the root filesystem.

Now, most people never noticed this, because not only is it
timing-dependent, but modern distributions all use initrd.  So the root
filesystem isn't actually on a disk at all.  And then before they
actually mount the final disk filesystem, they will have loaded the
scsi-wait-scan module, which not only does the expected
wait_for_device_probe(), but also does scsi_complete_async_scans().

[ Side note: scsi_complete_async_scans() had also been partially broken,
  but that was fixed in commit 43a8d39d0137 ("fix async probe
  regression"), so that same commit a7a20d103994 had actually broken
  setups even if you used scsi-wait-scan explicitly ]

Solve this problem by just moving the scsi_complete_async_scans() call
into wait_for_device_probe().  Everybody who wants to wait for device
probing to finish really wants the SCSI probing to complete, so there's
no reason not to do this.

So now "wait_for_device_probe()" really does what the name implies, and
properly waits for device probing to finish.  This also removes the now
unnecessary extra calls to scsi_complete_async_scans().

Reported-and-tested-by: Artem S. Tashkinov <t.artem@mailcity.com>
Cc: Dan Williams <dan.j.williams@gmail.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: James Bottomley <jbottomley@parallels.com>
Cc: Borislav Petkov <bp@amd64.org>
Cc: linux-scsi <linux-scsi@vger.kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/dd.c             | 2 ++
 drivers/scsi/scsi_wait_scan.c | 5 -----
 include/linux/device.h        | 2 --
 kernel/power/hibernate.c      | 8 --------
 kernel/power/user.c           | 2 --
 5 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index dcb8a6e48692..4b01ab3d2c24 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -24,6 +24,7 @@
 #include <linux/wait.h>
 #include <linux/async.h>
 #include <linux/pm_runtime.h>
+#include <scsi/scsi_scan.h>
 
 #include "base.h"
 #include "power/power.h"
@@ -332,6 +333,7 @@ void wait_for_device_probe(void)
 	/* wait for the known devices to complete their probing */
 	wait_event(probe_waitqueue, atomic_read(&probe_count) == 0);
 	async_synchronize_full();
+	scsi_complete_async_scans();
 }
 EXPORT_SYMBOL_GPL(wait_for_device_probe);
 
diff --git a/drivers/scsi/scsi_wait_scan.c b/drivers/scsi/scsi_wait_scan.c
index ae7814874618..072734538876 100644
--- a/drivers/scsi/scsi_wait_scan.c
+++ b/drivers/scsi/scsi_wait_scan.c
@@ -22,11 +22,6 @@ static int __init wait_scan_init(void)
 	 * and might not yet have reached the scsi async scanning
 	 */
 	wait_for_device_probe();
-	/*
-	 * and then we wait for the actual asynchronous scsi scan
-	 * to finish.
-	 */
-	scsi_complete_async_scans();
 	return 0;
 }
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 161d96241b1b..6de94151ff6f 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -865,8 +865,6 @@ extern int (*platform_notify_remove)(struct device *dev);
 extern struct device *get_device(struct device *dev);
 extern void put_device(struct device *dev);
 
-extern void wait_for_device_probe(void);
-
 #ifdef CONFIG_DEVTMPFS
 extern int devtmpfs_create_node(struct device *dev);
 extern int devtmpfs_delete_node(struct device *dev);
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 8b53db38a279..238025f5472e 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -27,7 +27,6 @@
 #include <linux/syscore_ops.h>
 #include <linux/ctype.h>
 #include <linux/genhd.h>
-#include <scsi/scsi_scan.h>
 
 #include "power.h"
 
@@ -748,13 +747,6 @@ static int software_resume(void)
 			async_synchronize_full();
 		}
 
-		/*
-		 * We can't depend on SCSI devices being available after loading
-		 * one of their modules until scsi_complete_async_scans() is
-		 * called and the resume device usually is a SCSI one.
-		 */
-		scsi_complete_async_scans();
-
 		swsusp_resume_device = name_to_dev_t(resume_file);
 		if (!swsusp_resume_device) {
 			error = -ENODEV;
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 91b0fd021a95..4ed81e74f86f 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -24,7 +24,6 @@
 #include <linux/console.h>
 #include <linux/cpu.h>
 #include <linux/freezer.h>
-#include <scsi/scsi_scan.h>
 
 #include <asm/uaccess.h>
 
@@ -84,7 +83,6 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 		 * appear.
 		 */
 		wait_for_device_probe();
-		scsi_complete_async_scans();
 
 		data->swap = -1;
 		data->mode = O_WRONLY;
-- 
cgit v1.2.3


From 893a0574de0c90a4e52c8f7070023b2eb58cd220 Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yuasa@linux-mips.org>
Date: Wed, 18 Jul 2012 14:12:01 -0700
Subject: mips: fix bug.h build regression

Commit 377780887 ("bug.h: need linux/kernel.h for TAINT_WARN.") broke
all MIPS builds:

    CC      arch/mips/kernel/machine_kexec.o
  include/linux/log2.h: In function '__ilog2_u32':
  include/linux/log2.h:34:2: error: implicit declaration of function 'fls' [-Werror=implicit-function-declaration]
  include/linux/log2.h: In function '__ilog2_u64':
  include/linux/log2.h:42:2: error: implicit declaration of function 'fls64' [-Werror=implicit-function-declaration]
  ...

Signed-off-by: Yoichi Yuasa <yuasa@linux-mips.org>
Tested-by: John Crispin <blogic@openwrt.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Daney <ddaney@caviumnetworks.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/include/asm/bitops.h | 1 -
 arch/mips/include/asm/io.h     | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 2e1ad4c652b7..82ad35ce2b45 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -17,7 +17,6 @@
 #include <linux/irqflags.h>
 #include <linux/types.h>
 #include <asm/barrier.h>
-#include <asm/bug.h>
 #include <asm/byteorder.h>		/* sigh ... */
 #include <asm/cpu-features.h>
 #include <asm/sgidefs.h>
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index a58f22998a86..29d9c23c20c7 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -17,6 +17,7 @@
 #include <linux/types.h>
 
 #include <asm/addrspace.h>
+#include <asm/bug.h>
 #include <asm/byteorder.h>
 #include <asm/cpu.h>
 #include <asm/cpu-features.h>
-- 
cgit v1.2.3


From b1bdd2eb31309a3b61235613041180cd50be19a0 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Wed, 18 Jul 2012 14:12:04 -0700
Subject: kexec: update URL of kexec homepage

The referenced html file does not exist anymore. Replace the URL with
the current project homepage.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kdump/kdump.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 506c7390c2b9..13f1aa09b938 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -86,7 +86,7 @@ There is also a gitweb interface available at
 http://www.kernel.org/git/?p=utils/kernel/kexec/kexec-tools.git
 
 More information about kexec-tools can be found at
-http://www.kernel.org/pub/linux/utils/kernel/kexec/README.html
+http://horms.net/projects/kexec/
 
 3) Unpack the tarball with the tar command, as follows:
 
-- 
cgit v1.2.3


From 25f7fd470bc97bb93d3a674e8c56c4a29063ec97 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 19 Jul 2012 15:59:18 +1000
Subject: md: fix bug in handling of new_data_offset

commit c6563a8c38fde3c1c7fc925a10bde3ca20799301
    md: add possibility to change data-offset for devices.

introduced a 'new_data_offset' attribute which should normally
be the same as 'data_offset', but can be explicitly set to a different
value to allow a reshape operation to move the data.

Unfortunately when the 'data_offset' is explicitly set through
sysfs, the new_data_offset is not also set, so the two would become
out-of-sync incorrectly.

One result of this is that trying to set the 'size' after the
'data_offset' would fail because it is not permitted to set the size
when the 'data_offset' and 'new_data_offset' are different - as that
can be confusing.
Consequently when mdadm tried to do this while assembling an IMSM
array it would fail.

This bug was introduced in 3.5-rc1.

Reported-by: Brian Downing <bdowning@lavos.net>
Bisected-by: Brian Downing <bdowning@lavos.net>
Tested-by: Brian Downing <bdowning@lavos.net>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/md.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index a4c219e3c859..0c1fe4cbce6b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2931,6 +2931,7 @@ offset_store(struct md_rdev *rdev, const char *buf, size_t len)
 		 * can be sane */
 		return -EBUSY;
 	rdev->data_offset = offset;
+	rdev->new_data_offset = offset;
 	return len;
 }
 
-- 
cgit v1.2.3


From a05b7ea03d72f36edb0cec05e8893803335c61a0 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 19 Jul 2012 15:59:18 +1000
Subject: md: avoid crash when stopping md array races with closing other open
 fds.

md will refuse to stop an array if any other fd (or mounted fs) is
using it.
When any fs is unmounted of when the last open fd is closed all
pending IO will be flushed (e.g. sync_blockdev call in __blkdev_put)
so there will be no pending IO to worry about when the array is
stopped.

However in order to send the STOP_ARRAY ioctl to stop the array one
must first get and open fd on the block device.
If some fd is being used to write to the block device and it is closed
after mdadm open the block device, but before mdadm issues the
STOP_ARRAY ioctl, then there will be no last-close on the md device so
__blkdev_put will not call sync_blockdev.

If this happens, then IO can still be in-flight while md tears down
the array and bad things can happen (use-after-free and subsequent
havoc).

So in the case where do_md_stop is being called from an open file
descriptor, call sync_block after taking the mutex to ensure there
will be no new openers.

This is needed when setting a read-write device to read-only too.

Cc: stable@vger.kernel.org
Reported-by: majianpeng <majianpeng@gmail.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/md.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0c1fe4cbce6b..d5ab4493c8be 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3927,8 +3927,8 @@ array_state_show(struct mddev *mddev, char *page)
 	return sprintf(page, "%s\n", array_states[st]);
 }
 
-static int do_md_stop(struct mddev * mddev, int ro, int is_open);
-static int md_set_readonly(struct mddev * mddev, int is_open);
+static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev);
+static int md_set_readonly(struct mddev * mddev, struct block_device *bdev);
 static int do_md_run(struct mddev * mddev);
 static int restart_array(struct mddev *mddev);
 
@@ -3944,14 +3944,14 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
 		/* stopping an active array */
 		if (atomic_read(&mddev->openers) > 0)
 			return -EBUSY;
-		err = do_md_stop(mddev, 0, 0);
+		err = do_md_stop(mddev, 0, NULL);
 		break;
 	case inactive:
 		/* stopping an active array */
 		if (mddev->pers) {
 			if (atomic_read(&mddev->openers) > 0)
 				return -EBUSY;
-			err = do_md_stop(mddev, 2, 0);
+			err = do_md_stop(mddev, 2, NULL);
 		} else
 			err = 0; /* already inactive */
 		break;
@@ -3959,7 +3959,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
 		break; /* not supported yet */
 	case readonly:
 		if (mddev->pers)
-			err = md_set_readonly(mddev, 0);
+			err = md_set_readonly(mddev, NULL);
 		else {
 			mddev->ro = 1;
 			set_disk_ro(mddev->gendisk, 1);
@@ -3969,7 +3969,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
 	case read_auto:
 		if (mddev->pers) {
 			if (mddev->ro == 0)
-				err = md_set_readonly(mddev, 0);
+				err = md_set_readonly(mddev, NULL);
 			else if (mddev->ro == 1)
 				err = restart_array(mddev);
 			if (err == 0) {
@@ -5352,15 +5352,17 @@ void md_stop(struct mddev *mddev)
 }
 EXPORT_SYMBOL_GPL(md_stop);
 
-static int md_set_readonly(struct mddev *mddev, int is_open)
+static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
 {
 	int err = 0;
 	mutex_lock(&mddev->open_mutex);
-	if (atomic_read(&mddev->openers) > is_open) {
+	if (atomic_read(&mddev->openers) > !!bdev) {
 		printk("md: %s still in use.\n",mdname(mddev));
 		err = -EBUSY;
 		goto out;
 	}
+	if (bdev)
+		sync_blockdev(bdev);
 	if (mddev->pers) {
 		__md_stop_writes(mddev);
 
@@ -5382,18 +5384,26 @@ out:
  *   0 - completely stop and dis-assemble array
  *   2 - stop but do not disassemble array
  */
-static int do_md_stop(struct mddev * mddev, int mode, int is_open)
+static int do_md_stop(struct mddev * mddev, int mode,
+		      struct block_device *bdev)
 {
 	struct gendisk *disk = mddev->gendisk;
 	struct md_rdev *rdev;
 
 	mutex_lock(&mddev->open_mutex);
-	if (atomic_read(&mddev->openers) > is_open ||
+	if (atomic_read(&mddev->openers) > !!bdev ||
 	    mddev->sysfs_active) {
 		printk("md: %s still in use.\n",mdname(mddev));
 		mutex_unlock(&mddev->open_mutex);
 		return -EBUSY;
 	}
+	if (bdev)
+		/* It is possible IO was issued on some other
+		 * open file which was closed before we took ->open_mutex.
+		 * As that was not the last close __blkdev_put will not
+		 * have called sync_blockdev, so we must.
+		 */
+		sync_blockdev(bdev);
 
 	if (mddev->pers) {
 		if (mddev->ro)
@@ -5467,7 +5477,7 @@ static void autorun_array(struct mddev *mddev)
 	err = do_md_run(mddev);
 	if (err) {
 		printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
-		do_md_stop(mddev, 0, 0);
+		do_md_stop(mddev, 0, NULL);
 	}
 }
 
@@ -6482,11 +6492,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
 			goto done_unlock;
 
 		case STOP_ARRAY:
-			err = do_md_stop(mddev, 0, 1);
+			err = do_md_stop(mddev, 0, bdev);
 			goto done_unlock;
 
 		case STOP_ARRAY_RO:
-			err = md_set_readonly(mddev, 1);
+			err = md_set_readonly(mddev, bdev);
 			goto done_unlock;
 
 		case BLKROSET:
-- 
cgit v1.2.3


From 58e94ae18478c08229626daece2fc108a4a23261 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 19 Jul 2012 15:59:18 +1000
Subject: md/raid1: close some possible races on write errors during resync

commit 4367af556133723d0f443e14ca8170d9447317cb
   md/raid1: clear bad-block record when write succeeds.

Added a 'reschedule_retry' call possibility at the end of
end_sync_write, but didn't add matching code at the end of
sync_request_write.  So if the writes complete very quickly, or
scheduling makes it seem that way, then we can miss rescheduling
the request and the resync could hang.

Also commit 73d5c38a9536142e062c35997b044e89166e063b
    md: avoid races when stopping resync.

Fix a race condition in this same code in end_sync_write but didn't
make the change in sync_request_write.

This patch updates sync_request_write to fix both of those.
Patch is suitable for 3.1 and later kernels.

Reported-by: Alexander Lyakas <alex.bolshoy@gmail.com>
Original-version-by: Alexander Lyakas <alex.bolshoy@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/raid1.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 240ff3125040..cacd008d6864 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1818,8 +1818,14 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
 
 	if (atomic_dec_and_test(&r1_bio->remaining)) {
 		/* if we're here, all write(s) have completed, so clean up */
-		md_done_sync(mddev, r1_bio->sectors, 1);
-		put_buf(r1_bio);
+		int s = r1_bio->sectors;
+		if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
+		    test_bit(R1BIO_WriteError, &r1_bio->state))
+			reschedule_retry(r1_bio);
+		else {
+			put_buf(r1_bio);
+			md_done_sync(mddev, s, 1);
+		}
 	}
 }
 
-- 
cgit v1.2.3


From 78d4803f75277f78ab4fc3be7ad462d78f726df9 Mon Sep 17 00:00:00 2001
From: Leonid Yegoshin <yegoshin@mips.com>
Date: Fri, 6 Jul 2012 21:56:01 +0200
Subject: MIPS: Don't panic on 5KEc.

It's a bloody bog standard MIPS64R2 core with just a new PrId ID.  Iow
that essentially means Linux just panics because it doesn't know how to
name the core.

[ralf@linux-mips.org: Split original patch into several smaller patches.]

Signed-off-by: Leonid Yegoshin <yegoshin@mips.com>
Signed-off-by: Steven J. Hill <sjhill@mips.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/3792/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cpu.h  | 2 +-
 arch/mips/kernel/cpu-probe.c | 4 ++++
 arch/mips/kernel/traps.c     | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index c64910586b74..95e40c1e8ed1 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -266,7 +266,7 @@ enum cpu_type_enum {
 	/*
 	 * MIPS64 class processors
 	 */
-	CPU_5KC, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2,
+	CPU_5KC, CPU_5KE, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2,
 	CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS, CPU_CAVIUM_OCTEON2,
 	CPU_XLR, CPU_XLP,
 
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index aaf39f3eaa51..f4630e1082ab 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -811,6 +811,10 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_5KC;
 		__cpu_name[cpu] = "MIPS 5Kc";
 		break;
+	case PRID_IMP_5KE:
+		c->cputype = CPU_5KE;
+		__cpu_name[cpu] = "MIPS 5KE";
+		break;
 	case PRID_IMP_20KC:
 		c->cputype = CPU_20KC;
 		__cpu_name[cpu] = "MIPS 20Kc";
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index f985b7292cd9..ce95f2c41f3f 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1249,6 +1249,7 @@ static inline void parity_protection_init(void)
 		break;
 
 	case CPU_5KC:
+	case CPU_5KE:
 		write_c0_ecc(0x80000000);
 		back_to_back_c0_hazard();
 		/* Set the PE bit (bit 31) in the c0_errctl register. */
-- 
cgit v1.2.3


From c022630633624a75b3b58f43dd3c6cc896a56cff Mon Sep 17 00:00:00 2001
From: "Steven J. Hill" <sjhill@mips.com>
Date: Fri, 6 Jul 2012 21:56:01 +0200
Subject: MIPS: Refactor 'clear_page' and 'copy_page' functions.

Remove usage of the '__attribute__((alias("...")))' hack that aliased
to integer arrays containing micro-assembled instructions. This hack
breaks when building a microMIPS kernel. It also makes the code much
easier to understand.

[ralf@linux-mips.org: Added back export of the clear_page and copy_page
symbols so certain modules will work again.  Also fixed build with
CONFIG_SIBYTE_DMA_PAGEOPS enabled.]

Signed-off-by: Steven J. Hill <sjhill@mips.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/3866/
Acked-by: David Daney <david.daney@cavium.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/mips_ksyms.c |  8 +++++-
 arch/mips/mm/Makefile         |  4 +--
 arch/mips/mm/page-funcs.S     | 50 ++++++++++++++++++++++++++++++++
 arch/mips/mm/page.c           | 67 ++++++++++++-------------------------------
 4 files changed, 77 insertions(+), 52 deletions(-)
 create mode 100644 arch/mips/mm/page-funcs.S

diff --git a/arch/mips/kernel/mips_ksyms.c b/arch/mips/kernel/mips_ksyms.c
index 57ba13edb03a..3fc1691110dc 100644
--- a/arch/mips/kernel/mips_ksyms.c
+++ b/arch/mips/kernel/mips_ksyms.c
@@ -5,7 +5,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1996, 97, 98, 99, 2000, 01, 03, 04, 05 by Ralf Baechle
+ * Copyright (C) 1996, 97, 98, 99, 2000, 01, 03, 04, 05, 12 by Ralf Baechle
  * Copyright (C) 1999, 2000, 01 Silicon Graphics, Inc.
  */
 #include <linux/interrupt.h>
@@ -34,6 +34,12 @@ EXPORT_SYMBOL(memmove);
 
 EXPORT_SYMBOL(kernel_thread);
 
+/*
+ * Functions that operate on entire pages.  Mostly used by memory management.
+ */
+EXPORT_SYMBOL(clear_page);
+EXPORT_SYMBOL(copy_page);
+
 /*
  * Userspace access stuff.
  */
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile
index 4aa20280613e..fd6203f14f1f 100644
--- a/arch/mips/mm/Makefile
+++ b/arch/mips/mm/Makefile
@@ -3,8 +3,8 @@
 #
 
 obj-y				+= cache.o dma-default.o extable.o fault.o \
-				   gup.o init.o mmap.o page.o tlbex.o \
-				   tlbex-fault.o uasm.o
+				   gup.o init.o mmap.o page.o page-funcs.o \
+				   tlbex.o tlbex-fault.o uasm.o
 
 obj-$(CONFIG_32BIT)		+= ioremap.o pgtable-32.o
 obj-$(CONFIG_64BIT)		+= pgtable-64.o
diff --git a/arch/mips/mm/page-funcs.S b/arch/mips/mm/page-funcs.S
new file mode 100644
index 000000000000..48a6b38ff13e
--- /dev/null
+++ b/arch/mips/mm/page-funcs.S
@@ -0,0 +1,50 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Micro-assembler generated clear_page/copy_page functions.
+ *
+ * Copyright (C) 2012  MIPS Technologies, Inc.
+ * Copyright (C) 2012  Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <asm/asm.h>
+#include <asm/regdef.h>
+
+#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+#define cpu_clear_page_function_name	clear_page_cpu
+#define cpu_copy_page_function_name	copy_page_cpu
+#else
+#define cpu_clear_page_function_name	clear_page
+#define cpu_copy_page_function_name	copy_page
+#endif
+
+/*
+ * Maximum sizes:
+ *
+ * R4000 128 bytes S-cache:		0x058 bytes
+ * R4600 v1.7:				0x05c bytes
+ * R4600 v2.0:				0x060 bytes
+ * With prefetching, 16 word strides	0x120 bytes
+ */
+EXPORT(__clear_page_start)
+LEAF(cpu_clear_page_function_name)
+1:	j	1b		/* Dummy, will be replaced. */
+	.space 288
+END(cpu_clear_page_function_name)
+EXPORT(__clear_page_end)
+
+/*
+ * Maximum sizes:
+ *
+ * R4000 128 bytes S-cache:		0x11c bytes
+ * R4600 v1.7:				0x080 bytes
+ * R4600 v2.0:				0x07c bytes
+ * With prefetching, 16 word strides	0x540 bytes
+ */
+EXPORT(__copy_page_start)
+LEAF(cpu_copy_page_function_name)
+1:	j	1b		/* Dummy, will be replaced. */
+	.space 1344
+END(cpu_copy_page_function_name)
+EXPORT(__copy_page_end)
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index cc0b626858b3..98f530e18216 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -6,6 +6,7 @@
  * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
  * Copyright (C) 2007  Maciej W. Rozycki
  * Copyright (C) 2008  Thiemo Seufer
+ * Copyright (C) 2012  MIPS Technologies, Inc.
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -71,45 +72,6 @@ static struct uasm_reloc __cpuinitdata relocs[5];
 #define cpu_is_r4600_v1_x()	((read_c0_prid() & 0xfffffff0) == 0x00002010)
 #define cpu_is_r4600_v2_x()	((read_c0_prid() & 0xfffffff0) == 0x00002020)
 
-/*
- * Maximum sizes:
- *
- * R4000 128 bytes S-cache:		0x058 bytes
- * R4600 v1.7:				0x05c bytes
- * R4600 v2.0:				0x060 bytes
- * With prefetching, 16 word strides	0x120 bytes
- */
-
-static u32 clear_page_array[0x120 / 4];
-
-#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
-void clear_page_cpu(void *page) __attribute__((alias("clear_page_array")));
-#else
-void clear_page(void *page) __attribute__((alias("clear_page_array")));
-#endif
-
-EXPORT_SYMBOL(clear_page);
-
-/*
- * Maximum sizes:
- *
- * R4000 128 bytes S-cache:		0x11c bytes
- * R4600 v1.7:				0x080 bytes
- * R4600 v2.0:				0x07c bytes
- * With prefetching, 16 word strides	0x540 bytes
- */
-static u32 copy_page_array[0x540 / 4];
-
-#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
-void
-copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array")));
-#else
-void copy_page(void *to, void *from) __attribute__((alias("copy_page_array")));
-#endif
-
-EXPORT_SYMBOL(copy_page);
-
-
 static int pref_bias_clear_store __cpuinitdata;
 static int pref_bias_copy_load __cpuinitdata;
 static int pref_bias_copy_store __cpuinitdata;
@@ -282,10 +244,15 @@ static inline void __cpuinit build_clear_pref(u32 **buf, int off)
 		}
 }
 
+extern u32 __clear_page_start;
+extern u32 __clear_page_end;
+extern u32 __copy_page_start;
+extern u32 __copy_page_end;
+
 void __cpuinit build_clear_page(void)
 {
 	int off;
-	u32 *buf = (u32 *)&clear_page_array;
+	u32 *buf = &__clear_page_start;
 	struct uasm_label *l = labels;
 	struct uasm_reloc *r = relocs;
 	int i;
@@ -356,17 +323,17 @@ void __cpuinit build_clear_page(void)
 	uasm_i_jr(&buf, RA);
 	uasm_i_nop(&buf);
 
-	BUG_ON(buf > clear_page_array + ARRAY_SIZE(clear_page_array));
+	BUG_ON(buf > &__clear_page_end);
 
 	uasm_resolve_relocs(relocs, labels);
 
 	pr_debug("Synthesized clear page handler (%u instructions).\n",
-		 (u32)(buf - clear_page_array));
+		 (u32)(buf - &__clear_page_start));
 
 	pr_debug("\t.set push\n");
 	pr_debug("\t.set noreorder\n");
-	for (i = 0; i < (buf - clear_page_array); i++)
-		pr_debug("\t.word 0x%08x\n", clear_page_array[i]);
+	for (i = 0; i < (buf - &__clear_page_start); i++)
+		pr_debug("\t.word 0x%08x\n", (&__clear_page_start)[i]);
 	pr_debug("\t.set pop\n");
 }
 
@@ -427,7 +394,7 @@ static inline void build_copy_store_pref(u32 **buf, int off)
 void __cpuinit build_copy_page(void)
 {
 	int off;
-	u32 *buf = (u32 *)&copy_page_array;
+	u32 *buf = &__copy_page_start;
 	struct uasm_label *l = labels;
 	struct uasm_reloc *r = relocs;
 	int i;
@@ -595,21 +562,23 @@ void __cpuinit build_copy_page(void)
 	uasm_i_jr(&buf, RA);
 	uasm_i_nop(&buf);
 
-	BUG_ON(buf > copy_page_array + ARRAY_SIZE(copy_page_array));
+	BUG_ON(buf > &__copy_page_end);
 
 	uasm_resolve_relocs(relocs, labels);
 
 	pr_debug("Synthesized copy page handler (%u instructions).\n",
-		 (u32)(buf - copy_page_array));
+		 (u32)(buf - &__copy_page_start));
 
 	pr_debug("\t.set push\n");
 	pr_debug("\t.set noreorder\n");
-	for (i = 0; i < (buf - copy_page_array); i++)
-		pr_debug("\t.word 0x%08x\n", copy_page_array[i]);
+	for (i = 0; i < (buf - &__copy_page_start); i++)
+		pr_debug("\t.word 0x%08x\n", (&__copy_page_start)[i]);
 	pr_debug("\t.set pop\n");
 }
 
 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+extern void clear_page_cpu(void *page);
+extern void copy_page_cpu(void *to, void *from);
 
 /*
  * Pad descriptors to cacheline, since each is exclusively owned by a
-- 
cgit v1.2.3


From dc34b05fea0cc9a869863b929f37f1e8ce30edf4 Mon Sep 17 00:00:00 2001
From: Douglas Leung <douglas@mips.com>
Date: Thu, 19 Jul 2012 09:11:13 +0200
Subject: MIPS: Fix decoding of c0_config1 for MIPSxx caches with 32 ways per
 set.

This affects certain 4Kc cores.

Signed-off-by: Douglas Leung <douglas@mips.com>
Signed-off-by: Steven J. Hill <sjhill@mips.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/3855/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/c-r4k.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index e56efd059189..f092c265dc63 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -977,7 +977,7 @@ static void __cpuinit probe_pcache(void)
 			c->icache.linesz = 2 << lsize;
 		else
 			c->icache.linesz = lsize;
-		c->icache.sets = 64 << ((config1 >> 22) & 7);
+		c->icache.sets = 32 << (((config1 >> 22) + 1) & 7);
 		c->icache.ways = 1 + ((config1 >> 16) & 7);
 
 		icache_size = c->icache.sets *
@@ -997,7 +997,7 @@ static void __cpuinit probe_pcache(void)
 			c->dcache.linesz = 2 << lsize;
 		else
 			c->dcache.linesz= lsize;
-		c->dcache.sets = 64 << ((config1 >> 13) & 7);
+		c->dcache.sets = 32 << (((config1 >> 13) + 1) & 7);
 		c->dcache.ways = 1 + ((config1 >> 7) & 7);
 
 		dcache_size = c->dcache.sets *
-- 
cgit v1.2.3


From 2dd17030c940ef1199a07b095ec79c4660fa8734 Mon Sep 17 00:00:00 2001
From: Leonid Yegoshin <yegoshin@mips.com>
Date: Thu, 19 Jul 2012 09:11:14 +0200
Subject: MIPS: Fix race condition with FPU thread task flag during context
 switch.

[ralf@linux-mips.org: Cosmetic changes; also fixed up r2300_switch.S and
octeon_switch.S which needed similar modifications.]

Signed-off-by: Leonid Yegoshin <yegoshin@mips.com>
Signed-off-by: Steven J. Hill <sjhill@mips.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/3784/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/switch_to.h |  6 ++++--
 arch/mips/kernel/octeon_switch.S  |  2 +-
 arch/mips/kernel/r2300_switch.S   | 15 +++------------
 arch/mips/kernel/r4k_switch.S     | 12 +++---------
 4 files changed, 11 insertions(+), 24 deletions(-)

diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index 5d33621b5658..4f8ddba8c360 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -22,7 +22,7 @@ struct task_struct;
  * switch_to(n) should switch tasks to task nr n, first
  * checking that n isn't the current task, in which case it does nothing.
  */
-extern asmlinkage void *resume(void *last, void *next, void *next_ti);
+extern asmlinkage void *resume(void *last, void *next, void *next_ti, u32 __usedfpu);
 
 extern unsigned int ll_bit;
 extern struct task_struct *ll_task;
@@ -66,11 +66,13 @@ do {									\
 
 #define switch_to(prev, next, last)					\
 do {									\
+	u32 __usedfpu;							\
 	__mips_mt_fpaff_switch_to(prev);				\
 	if (cpu_has_dsp)						\
 		__save_dsp(prev);					\
 	__clear_software_ll_bit();					\
-	(last) = resume(prev, next, task_thread_info(next));		\
+	__usedfpu = test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU);	\
+	(last) = resume(prev, next, task_thread_info(next), __usedfpu);	\
 } while (0)
 
 #define finish_arch_switch(prev)					\
diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S
index ce89c8061708..0441f54b2a6a 100644
--- a/arch/mips/kernel/octeon_switch.S
+++ b/arch/mips/kernel/octeon_switch.S
@@ -31,7 +31,7 @@
 
 /*
  * task_struct *resume(task_struct *prev, task_struct *next,
- *                     struct thread_info *next_ti)
+ *                     struct thread_info *next_ti, int usedfpu)
  */
 	.align	7
 	LEAF(resume)
diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S
index 293898391e67..9c51be5a163a 100644
--- a/arch/mips/kernel/r2300_switch.S
+++ b/arch/mips/kernel/r2300_switch.S
@@ -43,7 +43,7 @@
 
 /*
  * task_struct *resume(task_struct *prev, task_struct *next,
- *                     struct thread_info *next_ti) )
+ *                     struct thread_info *next_ti, int usedfpu)
  */
 LEAF(resume)
 	mfc0	t1, CP0_STATUS
@@ -51,18 +51,9 @@ LEAF(resume)
 	cpu_save_nonscratch a0
 	sw	ra, THREAD_REG31(a0)
 
-	/*
-	 * check if we need to save FPU registers
-	 */
-	lw	t3, TASK_THREAD_INFO(a0)
-	lw	t0, TI_FLAGS(t3)
-	li	t1, _TIF_USEDFPU
-	and	t2, t0, t1
-	beqz	t2, 1f
-	nor	t1, zero, t1
+	beqz	a3, 1f
 
-	and	t0, t0, t1
-	sw	t0, TI_FLAGS(t3)
+	PTR_L	t3, TASK_THREAD_INFO(a0)
 
 	/*
 	 * clear saved user stack CU1 bit
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index 9414f9354469..42d2a3938420 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -41,7 +41,7 @@
 
 /*
  * task_struct *resume(task_struct *prev, task_struct *next,
- *                     struct thread_info *next_ti)
+ *                     struct thread_info *next_ti, int usedfpu)
  */
 	.align	5
 	LEAF(resume)
@@ -53,16 +53,10 @@
 	/*
 	 * check if we need to save FPU registers
 	 */
-	PTR_L	t3, TASK_THREAD_INFO(a0)
-	LONG_L	t0, TI_FLAGS(t3)
-	li	t1, _TIF_USEDFPU
-	and	t2, t0, t1
-	beqz	t2, 1f
-	nor	t1, zero, t1
 
-	and	t0, t0, t1
-	LONG_S	t0, TI_FLAGS(t3)
+	beqz    a3, 1f
 
+	PTR_L	t3, TASK_THREAD_INFO(a0)
 	/*
 	 * clear saved user stack CU1 bit
 	 */
-- 
cgit v1.2.3


From c5de50dada1403e1abc8cd4a8c9a1283c859e0bb Mon Sep 17 00:00:00 2001
From: "Steven J. Hill" <sjhill@mips.com>
Date: Thu, 19 Jul 2012 09:11:14 +0200
Subject: MIPS: Malta: Change start address to avoid conflicts.

There are ACPI and SMB devices in the 0x1000..0x1fff address range.

Signed-off-by: Steven J. Hill <sjhill@mips.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/3581/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mti-malta/malta-pci.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/mips/mti-malta/malta-pci.c b/arch/mips/mti-malta/malta-pci.c
index bf80921f2f56..916206823d45 100644
--- a/arch/mips/mti-malta/malta-pci.c
+++ b/arch/mips/mti-malta/malta-pci.c
@@ -241,8 +241,9 @@ void __init mips_pcibios_init(void)
 		return;
 	}
 
-	if (controller->io_resource->start < 0x00001000UL)	/* FIXME */
-		controller->io_resource->start = 0x00001000UL;
+	/* Change start address to avoid conflicts with ACPI and SMB devices */
+	if (controller->io_resource->start < 0x00002000UL)
+		controller->io_resource->start = 0x00002000UL;
 
 	iomem_resource.end &= 0xfffffffffULL;			/* 64 GB */
 	ioport_resource.end = controller->io_resource->end;
-- 
cgit v1.2.3


From 7b1c0d26a8e272787f0f9fcc5f3e8531df3b3409 Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Thu, 19 Jul 2012 09:11:14 +0200
Subject: MIPS: Properly align the .data..init_task section.

Improper alignment can lead to unbootable systems and/or random
crashes.

[ralf@linux-mips.org: This is a lond standing bug since
6eb10bc9e2deab06630261cd05c4cb1e9a60e980 (kernel.org) rsp.
c422a10917f75fd19fa7fe070aaaa23e384dae6f (lmo) [MIPS: Clean up linker script
using new linker script macros.] so dates back to 2.6.32.]

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org>
Patchwork: https://patchwork.linux-mips.org/patch/3881/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/thread_info.h | 4 ++--
 arch/mips/kernel/vmlinux.lds.S      | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index e2eca7d10598..ca97e0ecb64b 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -60,6 +60,8 @@ struct thread_info {
 register struct thread_info *__current_thread_info __asm__("$28");
 #define current_thread_info()  __current_thread_info
 
+#endif /* !__ASSEMBLY__ */
+
 /* thread information allocation */
 #if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT)
 #define THREAD_SIZE_ORDER (1)
@@ -85,8 +87,6 @@ register struct thread_info *__current_thread_info __asm__("$28");
 
 #define STACK_WARN	(THREAD_SIZE / 8)
 
-#endif /* !__ASSEMBLY__ */
-
 #define PREEMPT_ACTIVE		0x10000000
 
 /*
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 924da5eb7031..df243a64f430 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -1,5 +1,6 @@
 #include <asm/asm-offsets.h>
 #include <asm/page.h>
+#include <asm/thread_info.h>
 #include <asm-generic/vmlinux.lds.h>
 
 #undef mips
@@ -72,7 +73,7 @@ SECTIONS
 	.data : {	/* Data */
 		. = . + DATAOFFSET;		/* for CONFIG_MAPPED_KERNEL */
 
-		INIT_TASK_DATA(PAGE_SIZE)
+		INIT_TASK_DATA(THREAD_SIZE)
 		NOSAVE_DATA
 		CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
 		READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
-- 
cgit v1.2.3


From c7b2ec2106b906e9233eefaac310ca8f4ce26934 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Thu, 19 Jul 2012 09:11:14 +0200
Subject: MIPS: SMTC: Spelling and grammar corrections.

Extractd from Steven J. Hill's https://patchwork.linux-mips.org/patch/3603/.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/smtc.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index f5dd38f1d015..b450ea529ddf 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -322,7 +322,7 @@ int __init smtc_build_cpu_map(int start_cpu_slot)
 
 /*
  * Common setup before any secondaries are started
- * Make sure all CPU's are in a sensible state before we boot any of the
+ * Make sure all CPUs are in a sensible state before we boot any of the
  * secondaries.
  *
  * For MIPS MT "SMTC" operation, we set up all TCs, spread as evenly
@@ -340,12 +340,12 @@ static void smtc_tc_setup(int vpe, int tc, int cpu)
 	/*
 	 * TCContext gets an offset from the base of the IPIQ array
 	 * to be used in low-level code to detect the presence of
-	 * an active IPI queue
+	 * an active IPI queue.
 	 */
 	write_tc_c0_tccontext((sizeof(struct smtc_ipi_q) * cpu) << 16);
 	/* Bind tc to vpe */
 	write_tc_c0_tcbind(vpe);
-	/* In general, all TCs should have the same cpu_data indications */
+	/* In general, all TCs should have the same cpu_data indications. */
 	memcpy(&cpu_data[cpu], &cpu_data[0], sizeof(struct cpuinfo_mips));
 	/* For 34Kf, start with TC/CPU 0 as sole owner of single FPU context */
 	if (cpu_data[0].cputype == CPU_34K ||
@@ -358,8 +358,8 @@ static void smtc_tc_setup(int vpe, int tc, int cpu)
 }
 
 /*
- * Tweak to get Count registes in as close a sync as possible.
- * Value seems good for 34K-class cores.
+ * Tweak to get Count registes in as close a sync as possible.  The
+ * value seems good for 34K-class cores.
  */
 
 #define CP0_SKEW 8
-- 
cgit v1.2.3


From b96b62db8cec46693e192f31c2c14147212530fc Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yuasa@linux-mips.org>
Date: Thu, 19 Jul 2012 09:11:15 +0200
Subject: MIPS: BCM47xx: Fix BCMA_DRIVER_PCI_HOSTMODE config dependencies

warning: (BCM47XX_BCMA) selects BCMA_DRIVER_PCI_HOSTMODE which has unmet direct dependencies (BCMA_POSSIBLE && BCMA && MIPS && BCMA_HOST_PCI)
warning: (BCM47XX_BCMA) selects BCMA_DRIVER_PCI_HOSTMODE which has unmet direct dependencies (BCMA_POSSIBLE && BCMA && MIPS && BCMA_HOST_PCI)

Signed-off-by: Yoichi Yuasa <yuasa@linux-mips.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/3882/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/bcm47xx/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/bcm47xx/Kconfig b/arch/mips/bcm47xx/Kconfig
index 6210b8d84109..b311be45a720 100644
--- a/arch/mips/bcm47xx/Kconfig
+++ b/arch/mips/bcm47xx/Kconfig
@@ -21,6 +21,7 @@ config BCM47XX_BCMA
 	select BCMA
 	select BCMA_HOST_SOC
 	select BCMA_DRIVER_MIPS
+	select BCMA_HOST_PCI if PCI
 	select BCMA_DRIVER_PCI_HOSTMODE if PCI
 	default y
 	help
-- 
cgit v1.2.3


From 7ee91de45a9a841ac59d597901e984b859b31bbe Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yuasa@linux-mips.org>
Date: Thu, 19 Jul 2012 09:11:15 +0200
Subject: MIPS: Cavium: Fix duplicate ARCH_SPARSEMEM_ENABLE in kconfig.

Signed-off-by: Yoichi Yuasa <yuasa@linux-mips.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/3883/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig               | 1 +
 arch/mips/cavium-octeon/Kconfig | 4 ----
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 09ab87ee6fef..5eb8c932fe53 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1423,6 +1423,7 @@ config CPU_SB1
 config CPU_CAVIUM_OCTEON
 	bool "Cavium Octeon processor"
 	depends on SYS_HAS_CPU_CAVIUM_OCTEON
+	select ARCH_SPARSEMEM_ENABLE
 	select CPU_HAS_PREFETCH
 	select CPU_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_SMP
diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig
index f9e275a50d98..2f4f6d5e05b6 100644
--- a/arch/mips/cavium-octeon/Kconfig
+++ b/arch/mips/cavium-octeon/Kconfig
@@ -82,10 +82,6 @@ config CAVIUM_OCTEON_LOCK_L2_MEMCPY
 	help
 	  Lock the kernel's implementation of memcpy() into L2.
 
-config ARCH_SPARSEMEM_ENABLE
-	def_bool y
-	select SPARSEMEM_STATIC
-
 config IOMMU_HELPER
 	bool
 
-- 
cgit v1.2.3


From a586e14f2c4932fd8d4f14f713874f84f36f91c6 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Thu, 19 Jul 2012 09:11:15 +0200
Subject: MIPS: Fix typo multipy -> multiply

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/inst.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/include/asm/inst.h b/arch/mips/include/asm/inst.h
index 7ebfc392e58d..ab84064283db 100644
--- a/arch/mips/include/asm/inst.h
+++ b/arch/mips/include/asm/inst.h
@@ -251,7 +251,7 @@ struct f_format {	/* FPU register format */
 	unsigned int func : 6;
 };
 
-struct ma_format {	/* FPU multipy and add format (MIPS IV) */
+struct ma_format {	/* FPU multiply and add format (MIPS IV) */
 	unsigned int opcode : 6;
 	unsigned int fr : 5;
 	unsigned int ft : 5;
@@ -324,7 +324,7 @@ struct f_format {	/* FPU register format */
 	unsigned int opcode : 6;
 };
 
-struct ma_format {	/* FPU multipy and add format (MIPS IV) */
+struct ma_format {	/* FPU multiply and add format (MIPS IV) */
 	unsigned int fmt : 2;
 	unsigned int func : 4;
 	unsigned int fd : 5;
-- 
cgit v1.2.3


From 5d9fbed18e25c0ce4fe404550fdb46eaa54e52ce Mon Sep 17 00:00:00 2001
From: Leonid Yegoshin <yegoshin@mips.com>
Date: Thu, 19 Jul 2012 09:11:15 +0200
Subject: MIPS: Malta may also be equipped with MIPS64 R2 processors.

Signed-off-by: Leonid Yegoshin <yegoshin@mips.com>
Signed-off-by: Steven J. Hill <sjhill@mips.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/3792/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 5eb8c932fe53..b3e10fdd3898 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -288,6 +288,7 @@ config MIPS_MALTA
 	select SYS_HAS_CPU_MIPS32_R1
 	select SYS_HAS_CPU_MIPS32_R2
 	select SYS_HAS_CPU_MIPS64_R1
+	select SYS_HAS_CPU_MIPS64_R2
 	select SYS_HAS_CPU_NEVADA
 	select SYS_HAS_CPU_RM7000
 	select SYS_HAS_EARLY_PRINTK
-- 
cgit v1.2.3


From 5520e426901ccb3e6683132e5b70425a811d7f78 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Thu, 19 Jul 2012 09:11:15 +0200
Subject: MIPS: cmpxchg.h: Add missing include

Fix the following build breakage in v3.4-rc1:

  CC      kernel/irq_work.o
In file included from include/linux/irq_work.h:4:0,
                 from kernel/irq_work.c:10:
include/linux/llist.h: In function 'llist_del_all':
include/linux/llist.h:178:2: error: implicit declaration of function 'BUILD_BUG_ON' [-Werror=implicit-function-declaration]

Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/3568/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cmpxchg.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 285a41fa0b18..eee10dc07ac1 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -8,6 +8,7 @@
 #ifndef __ASM_CMPXCHG_H
 #define __ASM_CMPXCHG_H
 
+#include <linux/bug.h>
 #include <linux/irqflags.h>
 #include <asm/war.h>
 
-- 
cgit v1.2.3


From 4a043d79dc2d082b0df28820d43b96f1cdaf29e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 19 Jul 2012 09:11:16 +0200
Subject: mips: mark const init data with __initconst instead of __initdata
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As long as there is no other non-const variable marked __initdata in the
same compilation unit it doesn't hurt. If there were one however
compilation would fail with

	error: $variablename causes a section type conflict

because a section containing const variables is marked read only and so
cannot contain non-const variables.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: linux-mips@linux-mips.org
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Cc: kernel@pengutronix.de
Patchwork: https://patchwork.linux-mips.org/patch/3565/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/bcm63xx/dev-pcmcia.c         | 4 ++--
 arch/mips/mti-malta/malta-setup.c      | 2 +-
 arch/mips/pci/fixup-mpc30x.c           | 4 ++--
 arch/mips/powertv/asic/asic-calliope.c | 2 +-
 arch/mips/powertv/asic/asic-cronus.c   | 2 +-
 arch/mips/powertv/asic/asic-gaia.c     | 2 +-
 arch/mips/powertv/asic/asic-zeus.c     | 2 +-
 7 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/mips/bcm63xx/dev-pcmcia.c b/arch/mips/bcm63xx/dev-pcmcia.c
index de4d917fd54d..a551bab5ecb9 100644
--- a/arch/mips/bcm63xx/dev-pcmcia.c
+++ b/arch/mips/bcm63xx/dev-pcmcia.c
@@ -79,11 +79,11 @@ static int __init config_pcmcia_cs(unsigned int cs,
 	return ret;
 }
 
-static const __initdata struct {
+static const struct {
 	unsigned int	cs;
 	unsigned int	base;
 	unsigned int	size;
-} pcmcia_cs[3] = {
+} pcmcia_cs[3] __initconst = {
 	{
 		.cs	= MPI_CS_PCMCIA_COMMON,
 		.base	= BCM_PCMCIA_COMMON_BASE_PA,
diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c
index b7f37d4982fa..2e28f653f66d 100644
--- a/arch/mips/mti-malta/malta-setup.c
+++ b/arch/mips/mti-malta/malta-setup.c
@@ -111,7 +111,7 @@ static void __init pci_clock_check(void)
 	unsigned int __iomem *jmpr_p =
 		(unsigned int *) ioremap(MALTA_JMPRS_REG, sizeof(unsigned int));
 	int jmpr = (__raw_readl(jmpr_p) >> 2) & 0x07;
-	static const int pciclocks[] __initdata = {
+	static const int pciclocks[] __initconst = {
 		33, 20, 25, 30, 12, 16, 37, 10
 	};
 	int pciclock = pciclocks[jmpr];
diff --git a/arch/mips/pci/fixup-mpc30x.c b/arch/mips/pci/fixup-mpc30x.c
index e08f49cb6875..8e4f8288eca2 100644
--- a/arch/mips/pci/fixup-mpc30x.c
+++ b/arch/mips/pci/fixup-mpc30x.c
@@ -22,13 +22,13 @@
 
 #include <asm/vr41xx/mpc30x.h>
 
-static const int internal_func_irqs[] __initdata = {
+static const int internal_func_irqs[] __initconst = {
 	VRC4173_CASCADE_IRQ,
 	VRC4173_AC97_IRQ,
 	VRC4173_USB_IRQ,
 };
 
-static const int irq_tab_mpc30x[] __initdata = {
+static const int irq_tab_mpc30x[] __initconst = {
  [12] = VRC4173_PCMCIA1_IRQ,
  [13] = VRC4173_PCMCIA2_IRQ,
  [29] = MQ200_IRQ,
diff --git a/arch/mips/powertv/asic/asic-calliope.c b/arch/mips/powertv/asic/asic-calliope.c
index 0a170e0ffeaa..7773f3d956b0 100644
--- a/arch/mips/powertv/asic/asic-calliope.c
+++ b/arch/mips/powertv/asic/asic-calliope.c
@@ -28,7 +28,7 @@
 
 #define CALLIOPE_ADDR(x)	(CALLIOPE_IO_BASE + (x))
 
-const struct register_map calliope_register_map __initdata = {
+const struct register_map calliope_register_map __initconst = {
 	.eic_slow0_strt_add = {.phys = CALLIOPE_ADDR(0x800000)},
 	.eic_cfg_bits = {.phys = CALLIOPE_ADDR(0x800038)},
 	.eic_ready_status = {.phys = CALLIOPE_ADDR(0x80004c)},
diff --git a/arch/mips/powertv/asic/asic-cronus.c b/arch/mips/powertv/asic/asic-cronus.c
index bbc0c122be5e..da076db7b7ed 100644
--- a/arch/mips/powertv/asic/asic-cronus.c
+++ b/arch/mips/powertv/asic/asic-cronus.c
@@ -28,7 +28,7 @@
 
 #define CRONUS_ADDR(x)	(CRONUS_IO_BASE + (x))
 
-const struct register_map cronus_register_map __initdata = {
+const struct register_map cronus_register_map __initconst = {
 	.eic_slow0_strt_add = {.phys = CRONUS_ADDR(0x000000)},
 	.eic_cfg_bits = {.phys = CRONUS_ADDR(0x000038)},
 	.eic_ready_status = {.phys = CRONUS_ADDR(0x00004C)},
diff --git a/arch/mips/powertv/asic/asic-gaia.c b/arch/mips/powertv/asic/asic-gaia.c
index 91dda682752c..47683b370e74 100644
--- a/arch/mips/powertv/asic/asic-gaia.c
+++ b/arch/mips/powertv/asic/asic-gaia.c
@@ -23,7 +23,7 @@
 #include <linux/init.h>
 #include <asm/mach-powertv/asic.h>
 
-const struct register_map gaia_register_map __initdata = {
+const struct register_map gaia_register_map __initconst = {
 	.eic_slow0_strt_add = {.phys = GAIA_IO_BASE + 0x000000},
 	.eic_cfg_bits = {.phys = GAIA_IO_BASE + 0x000038},
 	.eic_ready_status = {.phys = GAIA_IO_BASE + 0x00004C},
diff --git a/arch/mips/powertv/asic/asic-zeus.c b/arch/mips/powertv/asic/asic-zeus.c
index 4a05bb096476..6ff4b10f09da 100644
--- a/arch/mips/powertv/asic/asic-zeus.c
+++ b/arch/mips/powertv/asic/asic-zeus.c
@@ -28,7 +28,7 @@
 
 #define ZEUS_ADDR(x)	(ZEUS_IO_BASE + (x))
 
-const struct register_map zeus_register_map __initdata = {
+const struct register_map zeus_register_map __initconst = {
 	.eic_slow0_strt_add = {.phys = ZEUS_ADDR(0x000000)},
 	.eic_cfg_bits = {.phys = ZEUS_ADDR(0x000038)},
 	.eic_ready_status = {.phys = ZEUS_ADDR(0x00004c)},
-- 
cgit v1.2.3


From ca760ca5238c55cd0e29291c63e35ac6634d385f Mon Sep 17 00:00:00 2001
From: Danny Kukawka <danny.kukawka@bisect.de>
Date: Thu, 19 Jul 2012 09:11:16 +0200
Subject: MIPS: BMIPS: Fix duplicate header inclusion.

Signed-off-by: Danny Kukawka <danny.kukawka@bisect.de>
Cc: Danny Kukawka <dkukawka@suse.de>
Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/3369/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/smp-bmips.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 3046e2986006..32fe9254d943 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -15,7 +15,6 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
-#include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/reboot.h>
-- 
cgit v1.2.3


From e909be825212da62433e805f03586015a04f3c78 Mon Sep 17 00:00:00 2001
From: Vincent Wen <vincentwenlinux@gmail.com>
Date: Thu, 19 Jul 2012 09:11:16 +0200
Subject: MIPS: Fix Magic SysRq L kernel crash.

show_backtrace() was passed a NULL pointer which caused paging
request fail. Set to current task as other architectures (ARM,
etc) do when passed a NULL task pointer.

Signed-off-by: Vincent Wen <vincentwenlinux@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: cernekee@gmail.com
Patchwork: https://patchwork.linux-mips.org/patch/3524/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/traps.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index ce95f2c41f3f..9716f057a77b 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -132,6 +132,9 @@ static void show_backtrace(struct task_struct *task, const struct pt_regs *regs)
 	unsigned long ra = regs->regs[31];
 	unsigned long pc = regs->cp0_epc;
 
+	if (!task)
+		task = current;
+
 	if (raw_show_trace || !__kernel_text_address(pc)) {
 		show_raw_backtrace(sp);
 		return;
-- 
cgit v1.2.3


From 6c37c9580409af7dc664bb6af0a85d540d63aeea Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian@openwrt.org>
Date: Thu, 19 Jul 2012 09:13:52 +0200
Subject: MIPS: perf: Fix build error caused by unused
 counters_per_cpu_to_total()

cc1: warnings being treated as errors
arch/mips/kernel/perf_event_mipsxx.c:166: error: 'counters_per_cpu_to_total' defined but not used
make[2]: *** [arch/mips/kernel/perf_event_mipsxx.o] Error 1
make[2]: *** Waiting for unfinished jobs....

It was first introduced by 82091564cfd7ab8def42777a9c662dbf655c5d25 [MIPS:
perf: Add support for 64-bit perf counters.] in 3.2.

Signed-off-by: Florian Fainelli <florian@openwrt.org>
Cc: linux-mips@linux-mips.org
Cc: david.daney@cavium.com
Patchwork: https://patchwork.linux-mips.org/patch/3357/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/perf_event_mipsxx.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index f29099b104c4..eb5e394a4650 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -162,11 +162,6 @@ static unsigned int counters_total_to_per_cpu(unsigned int counters)
 	return counters >> vpe_shift();
 }
 
-static unsigned int counters_per_cpu_to_total(unsigned int counters)
-{
-	return counters << vpe_shift();
-}
-
 #else /* !CONFIG_MIPS_MT_SMP */
 #define vpe_id()	0
 
-- 
cgit v1.2.3


From 38be3c7e77ee9bc8ca34a83506e34809ec36c2a4 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian@openwrt.org>
Date: Thu, 19 Jul 2012 09:13:52 +0200
Subject: MIPS: BCM63XX: Fix BCM6368 IPSec clock bit

The IPsec clock bit is 18 and not 17.

Signed-off-by: Florian Fainelli <florian@openwrt.org>
Cc: linux-mips@linux-mips.org
Cc: mpm@selenic.com
Cc: herbert@gondor.apana.org.au
Patchwork: https://patchwork.linux-mips.org/patch/3323/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
index 94d4faad29a1..fdcd78ca1b03 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
@@ -99,7 +99,7 @@
 #define CKCTL_6368_USBH_CLK_EN		(1 << 15)
 #define CKCTL_6368_DISABLE_GLESS_EN	(1 << 16)
 #define CKCTL_6368_NAND_CLK_EN		(1 << 17)
-#define CKCTL_6368_IPSEC_CLK_EN		(1 << 17)
+#define CKCTL_6368_IPSEC_CLK_EN		(1 << 18)
 
 #define CKCTL_6368_ALL_SAFE_EN		(CKCTL_6368_SWPKT_USB_EN |	\
 					CKCTL_6368_SWPKT_SAR_EN |	\
-- 
cgit v1.2.3


From 68b6352cdcdaa743b38ca5705601f65ef9bd662f Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Thu, 19 Jul 2012 09:13:52 +0200
Subject: MIPS: Oprofile: Fix build as a module.

When building oprofile as a module for R10000 or R7000 class processors,
E9000 or MIPSxx class cores since 3572a2c37f667ee49333f8863722b8f43eac506b
[MIPS: make oprofile use cp0_perfcount_irq if it is set] an

ERROR: "cp0_compare_irq" [arch/mips/oprofile/oprofile.ko] undefined!

error will happen.  Fixed by exporting cp0_compare_irq.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/traps.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 9716f057a77b..c3c293543703 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1502,6 +1502,7 @@ extern void flush_tlb_handlers(void);
  * Timer interrupt
  */
 int cp0_compare_irq;
+EXPORT_SYMBOL_GPL(cp0_compare_irq);
 int cp0_compare_irq_shift;
 
 /*
-- 
cgit v1.2.3


From 1bcfecc028686ea32e49b0f4f6e8a665917cb49a Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang@windriver.com>
Date: Thu, 19 Jul 2012 09:13:53 +0200
Subject: MIPS: Octeon: delay enable irq to ->smp_finish()

To prepare for smoothing set_cpu_[active|online]() mess up

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: Sergei Shtylyov <sshtylyov@mvista.com>
Cc: David Daney <david.daney@cavium.com>
Acked-by: David Daney <david.daney@cavium.com>
Patchwork: https://patchwork.linux-mips.org/patch/3845/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cavium-octeon/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 4b93048044eb..ee1fb9f7f517 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -185,7 +185,6 @@ static void __cpuinit octeon_init_secondary(void)
 	octeon_init_cvmcount();
 
 	octeon_irq_setup_secondary();
-	raw_local_irq_enable();
 }
 
 /**
@@ -233,6 +232,7 @@ static void octeon_smp_finish(void)
 
 	/* to generate the first CPU timer interrupt */
 	write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ);
+	local_irq_enable();
 }
 
 /**
-- 
cgit v1.2.3


From 856ac3c6e0c4cb566014edf5fa185b962298db88 Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang@windriver.com>
Date: Thu, 19 Jul 2012 09:13:53 +0200
Subject: MIPS: BMIPS: delay irq enable to ->smp_finish()

To prepare for smoothing set_cpu_[active|online]() mess up

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: Sergei Shtylyov <sshtylyov@mvista.com>
Cc: David Daney <david.daney@cavium.com>
Acked-by: David Daney <david.daney@cavium.com>
Patchwork: https://patchwork.linux-mips.org/patch/3846/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/smp-bmips.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 32fe9254d943..8e393b8443f7 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -196,13 +196,6 @@ static void bmips_init_secondary(void)
 
 	write_c0_brcm_action(ACTION_CLR_IPI(smp_processor_id(), 0));
 #endif
-
-	/* make sure there won't be a timer interrupt for a little while */
-	write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ);
-
-	irq_enable_hazard();
-	set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ1 | IE_IRQ5 | ST0_IE);
-	irq_enable_hazard();
 }
 
 /*
@@ -211,6 +204,13 @@ static void bmips_init_secondary(void)
 static void bmips_smp_finish(void)
 {
 	pr_info("SMP: CPU%d is running\n", smp_processor_id());
+
+	/* make sure there won't be a timer interrupt for a little while */
+	write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ);
+
+	irq_enable_hazard();
+	set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ1 | IE_IRQ5 | ST0_IE);
+	irq_enable_hazard();
 }
 
 /*
-- 
cgit v1.2.3


From 70dc8fa782efa2faa82ecec0f250223e1d773a47 Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang@windriver.com>
Date: Thu, 19 Jul 2012 09:13:53 +0200
Subject: MIPS: SMTC: delay irq enable to ->smp_finish()

To prepare for smoothing set_cpu_[active|online]() mess up

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: Sergei Shtylyov <sshtylyov@mvista.com>
Cc: David Daney <david.daney@cavium.com>
Acked-by: David Daney <david.daney@cavium.com>
Patchwork: https://patchwork.linux-mips.org/patch/3847/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/smtc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index b450ea529ddf..15b5f3cfd20c 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -615,7 +615,6 @@ void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle)
 
 void smtc_init_secondary(void)
 {
-	local_irq_enable();
 }
 
 void smtc_smp_finish(void)
@@ -631,6 +630,8 @@ void smtc_smp_finish(void)
 	if (cpu > 0 && (cpu_data[cpu].vpe_id != cpu_data[cpu - 1].vpe_id))
 		write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
 
+	local_irq_enable();
+
 	printk("TC %d going on-line as CPU %d\n",
 		cpu_data[smp_processor_id()].tc_id, smp_processor_id());
 }
-- 
cgit v1.2.3


From 263afbdd1cdd49338c118a3064acee01c69bb501 Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang@windriver.com>
Date: Thu, 19 Jul 2012 09:13:53 +0200
Subject: MIPS: Yosemite: delay irq enable to ->smp_finish()

To prepare for smoothing set_cpu_[active|online]() mess up

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: Sergei Shtylyov <sshtylyov@mvista.com>
Cc: David Daney <david.daney@cavium.com>
Acked-by: David Daney <david.daney@cavium.com>
Patchwork: https://patchwork.linux-mips.org/patch/3848/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/pmc-sierra/yosemite/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c
index b71fae231049..5edab2bc6fc0 100644
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -115,11 +115,11 @@ static void yos_send_ipi_mask(const struct cpumask *mask, unsigned int action)
  */
 static void __cpuinit yos_init_secondary(void)
 {
-	set_c0_status(ST0_CO | ST0_IE | ST0_IM);
 }
 
 static void __cpuinit yos_smp_finish(void)
 {
+	set_c0_status(ST0_CO | ST0_IM | ST0_IE);
 }
 
 /* Hook for after all CPUs are online */
-- 
cgit v1.2.3


From 5309bdac7029c7d8659d6653761f3402e17ed1ab Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang@windriver.com>
Date: Thu, 19 Jul 2012 09:13:53 +0200
Subject: MIPS: call ->smp_finish() a little late

We have move irq enable to ->smp_finish. Place ->smp_finish() a little
late to prepare for move set_cpu_online() into start_secondary.
And it's not necessary to call cpu_set(cpu, cpu_callin_map) and
synchronise_count_slave() with irq enabled.

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: Sergei Shtylyov <sshtylyov@mvista.com>
Cc: David Daney <david.daney@cavium.com>
Acked-by: David Daney <david.daney@cavium.com>
Patchwork: https://patchwork.linux-mips.org/patch/3850/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/smp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 48650c818040..b181bbf410de 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -122,13 +122,14 @@ asmlinkage __cpuinit void start_secondary(void)
 
 	notify_cpu_starting(cpu);
 
-	mp_ops->smp_finish();
 	set_cpu_sibling_map(cpu);
 
 	cpu_set(cpu, cpu_callin_map);
 
 	synchronise_count_slave();
 
+	mp_ops->smp_finish();
+
 	cpu_idle();
 }
 
-- 
cgit v1.2.3


From b9a09a0660aa9174e489ac531244680971950ef8 Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang@windriver.com>
Date: Thu, 19 Jul 2012 09:13:53 +0200
Subject: MIPS: call set_cpu_online() on cpu being brought up with irq disabled

To prevent a problem as commit 5fbd036b [sched: Cleanup cpu_active madness]
and commit 2baab4e9 [sched: Fix select_fallback_rq() vs cpu_active/cpu_online]
try to resolve, move set_cpu_online() to the brought up CPU and with irq
disabled.

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: Sergei Shtylyov <sshtylyov@mvista.com>
Cc: David Daney <david.daney@cavium.com>
Acked-by: David Daney <david.daney@cavium.com>
Patchwork: https://patchwork.linux-mips.org/patch/3851/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/smp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index b181bbf410de..eb3e2b112a0d 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -122,6 +122,8 @@ asmlinkage __cpuinit void start_secondary(void)
 
 	notify_cpu_starting(cpu);
 
+	set_cpu_online(cpu, true);
+
 	set_cpu_sibling_map(cpu);
 
 	cpu_set(cpu, cpu_callin_map);
@@ -197,8 +199,6 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
 	while (!cpu_isset(cpu, cpu_callin_map))
 		udelay(100);
 
-	set_cpu_online(cpu, true);
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From b789ad63ac46b00f0862bdc23fc95556adc3cf2f Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang@windriver.com>
Date: Thu, 19 Jul 2012 09:13:53 +0200
Subject: MIPS: smp: Warn on too early irq enable

Just to catch a potential issue.

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: Sergei Shtylyov <sshtylyov@mvista.com>
Cc: David Daney <david.daney@cavium.com>
Acked-by: David Daney <david.daney@cavium.com>
Patchwork: https://patchwork.linux-mips.org/patch/3852/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/smp.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index eb3e2b112a0d..1268392f1d27 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -130,6 +130,11 @@ asmlinkage __cpuinit void start_secondary(void)
 
 	synchronise_count_slave();
 
+	/*
+	 * irq will be enabled in ->smp_finish(), enabling it too early
+	 * is dangerous.
+	 */
+	WARN_ON_ONCE(!irqs_disabled());
 	mp_ops->smp_finish();
 
 	cpu_idle();
-- 
cgit v1.2.3


From f2b88d65aa7adaa3996088e86ae497fe705e96f3 Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang@windriver.com>
Date: Thu, 19 Jul 2012 09:13:54 +0200
Subject: MIPS: sync-r4k: remove redundant irq operation

Since we have delayed irq enabling to ->smp_finish()

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: Sergei Shtylyov <sshtylyov@mvista.com>
Cc: David Daney <david.daney@cavium.com>
Acked-by: David Daney <david.daney@cavium.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/sync-r4k.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c
index 99f913c8d7a6..842d55e411fd 100644
--- a/arch/mips/kernel/sync-r4k.c
+++ b/arch/mips/kernel/sync-r4k.c
@@ -111,7 +111,6 @@ void __cpuinit synchronise_count_master(void)
 void __cpuinit synchronise_count_slave(void)
 {
 	int i;
-	unsigned long flags;
 	unsigned int initcount;
 	int ncpus;
 
@@ -123,8 +122,6 @@ void __cpuinit synchronise_count_slave(void)
 	return;
 #endif
 
-	local_irq_save(flags);
-
 	/*
 	 * Not every cpu is online at the time this gets called,
 	 * so we first wait for the master to say everyone is ready
@@ -154,7 +151,5 @@ void __cpuinit synchronise_count_slave(void)
 	}
 	/* Arrange for an interrupt in a short while */
 	write_c0_compare(read_c0_count() + COUNTON);
-
-	local_irq_restore(flags);
 }
 #undef NR_LOOPS
-- 
cgit v1.2.3


From 3592c3cd061fc717b90126de31912110fe0cae3c Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yuasa@linux-mips.org>
Date: Thu, 19 Jul 2012 07:13:54 +0200
Subject: MIPS: Fix bug.h MIPS build regression

Commit: 3777808873b0c49c5cf27e44c948dfb02675d578 [bug.h: need linux/kernel.h
for TAINT_WARN.] breaks all MIPS builds.

  CC      arch/mips/kernel/machine_kexec.o
In file included from include/linux/kernel.h:20:0,
                 from include/asm-generic/bug.h:35,
                 from /home/yuasa/src/linux/kernel/git/linux-2.6/arch/mips/include/asm/bug.h:41,
                 from /home/yuasa/src/linux/kernel/git/linux-2.6/arch/mips/include/asm/bitops.h:20,
                 from include/linux/bitops.h:22,
                 from include/linux/signal.h:38,
                 from include/linux/elfcore.h:5,
                 from include/linux/kexec.h:60,
                 from arch/mips/kernel/machine_kexec.c:9:
include/linux/log2.h: In function '__ilog2_u32':
include/linux/log2.h:34:2: error: implicit declaration of function 'fls' [-Werror=implicit-function-declaration]
include/linux/log2.h: In function '__ilog2_u64':
include/linux/log2.h:42:2: error: implicit declaration of function 'fls64' [-Werror=implicit-function-declaration]
include/linux/log2.h: In function '__roundup_pow_of_two':
include/linux/log2.h:63:2: error: implicit declaration of function 'fls_long' [-Werror=implicit-function-declaration]
In file included from include/linux/bitops.h:22:0,
                 from include/linux/signal.h:38,
                 from include/linux/elfcore.h:5,
                 from include/linux/kexec.h:60,
                 from arch/mips/kernel/machine_kexec.c:9:
/home/yuasa/src/linux/kernel/git/linux-2.6/arch/mips/include/asm/bitops.h: At top level:
/home/yuasa/src/linux/kernel/git/linux-2.6/arch/mips/include/asm/bitops.h:615:19: error: static declaration of 'fls' follows non-static declaration
include/linux/log2.h:34:9: note: previous implicit declaration of 'fls' was here
In file included from /home/yuasa/src/linux/kernel/git/linux-2.6/arch/mips/include/asm/bitops.h:651:0,
                 from include/linux/bitops.h:22,
                 from include/linux/signal.h:38,
                 from include/linux/elfcore.h:5,
                 from include/linux/kexec.h:60,
                 from arch/mips/kernel/machine_kexec.c:9:
include/asm-generic/bitops/fls64.h:18:28: error: static declaration of 'fls64' follows non-static declaration
include/linux/log2.h:42:9: note: previous implicit declaration of 'fls64' was here
In file included from include/linux/signal.h:38:0,
                 from include/linux/elfcore.h:5,
                 from include/linux/kexec.h:60,
                 from arch/mips/kernel/machine_kexec.c:9:
include/linux/bitops.h:160:24: error: conflicting types for 'fls_long'
include/linux/log2.h:63:16: note: previous implicit declaration of 'fls_long' was here
cc1: all warnings being treated as errors

make[2]: *** [arch/mips/kernel/machine_kexec.o] Error 1

Signed-off-by: Yoichi Yuasa <yuasa@linux-mips.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: yuasa@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Cc: Linuxppc-dev <linuxppc-dev@ozlabs.org>
Cc: Linux MIPS Mailing List <linux-mips@linux-mips.org>
Cc: Linux-sh list <linux-sh@vger.kernel.org>
Cc: Chris Zankel <chris@zankel.net>
Patchwork: https://patchwork.linux-mips.org/patch/4000/
Tested-by: John Crispin <blogic@openwrt.org>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/bitops.h | 1 -
 arch/mips/include/asm/io.h     | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 2e1ad4c652b7..82ad35ce2b45 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -17,7 +17,6 @@
 #include <linux/irqflags.h>
 #include <linux/types.h>
 #include <asm/barrier.h>
-#include <asm/bug.h>
 #include <asm/byteorder.h>		/* sigh ... */
 #include <asm/cpu-features.h>
 #include <asm/sgidefs.h>
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index a58f22998a86..29d9c23c20c7 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -17,6 +17,7 @@
 #include <linux/types.h>
 
 #include <asm/addrspace.h>
+#include <asm/bug.h>
 #include <asm/byteorder.h>
 #include <asm/cpu.h>
 #include <asm/cpu-features.h>
-- 
cgit v1.2.3


From 85a053fa5f2d67ae5b2968305b16e8d2fe4cdf4d Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Date: Thu, 19 Jul 2012 07:13:54 +0200
Subject: MIPS: PCI: Move fixups from __init to __devinit.

Fixups are executed once the pci-device is found which is during boot
process so __init seems fine as long as the platform does not support
hotplug.
However it is possible to remove the PCI bus at run time and have it
rediscovered again via "echo 1 > /sys/bus/pci/rescan" and this will call
the fixups again.

[ralf@linux-mips.org: Made piixirqmap[] in malta_piix_func0_fixup()
__initdata.]

Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: linux-mips@linux-mips.org
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mti-malta/malta-pci.c |  2 +-
 arch/mips/pci/fixup-fuloong2e.c | 12 ++++++------
 arch/mips/pci/fixup-lemote2f.c  | 12 ++++++------
 arch/mips/pci/fixup-malta.c     |  6 +++---
 arch/mips/pci/fixup-sb1250.c    |  6 +++---
 arch/mips/pci/ops-tx4927.c      |  2 +-
 arch/mips/pci/pci-ip27.c        |  2 +-
 arch/mips/txx9/generic/pci.c    |  2 +-
 8 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/arch/mips/mti-malta/malta-pci.c b/arch/mips/mti-malta/malta-pci.c
index 916206823d45..284dea54faf5 100644
--- a/arch/mips/mti-malta/malta-pci.c
+++ b/arch/mips/mti-malta/malta-pci.c
@@ -254,7 +254,7 @@ void __init mips_pcibios_init(void)
 }
 
 /* Enable PCI 2.1 compatibility in PIIX4 */
-static void __init quirk_dlcsetup(struct pci_dev *dev)
+static void __devinit quirk_dlcsetup(struct pci_dev *dev)
 {
 	u8 odlc, ndlc;
 	(void) pci_read_config_byte(dev, 0x82, &odlc);
diff --git a/arch/mips/pci/fixup-fuloong2e.c b/arch/mips/pci/fixup-fuloong2e.c
index d5d4c018fb04..0857ab8c3919 100644
--- a/arch/mips/pci/fixup-fuloong2e.c
+++ b/arch/mips/pci/fixup-fuloong2e.c
@@ -48,7 +48,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 	return 0;
 }
 
-static void __init loongson2e_nec_fixup(struct pci_dev *pdev)
+static void __devinit loongson2e_nec_fixup(struct pci_dev *pdev)
 {
 	unsigned int val;
 
@@ -60,7 +60,7 @@ static void __init loongson2e_nec_fixup(struct pci_dev *pdev)
 	pci_write_config_dword(pdev, 0xe4, 1 << 5);
 }
 
-static void __init loongson2e_686b_func0_fixup(struct pci_dev *pdev)
+static void __devinit loongson2e_686b_func0_fixup(struct pci_dev *pdev)
 {
 	unsigned char c;
 
@@ -135,7 +135,7 @@ static void __init loongson2e_686b_func0_fixup(struct pci_dev *pdev)
 	printk(KERN_INFO"via686b fix: ISA bridge done\n");
 }
 
-static void __init loongson2e_686b_func1_fixup(struct pci_dev *pdev)
+static void __devinit loongson2e_686b_func1_fixup(struct pci_dev *pdev)
 {
 	printk(KERN_INFO"via686b fix: IDE\n");
 
@@ -168,19 +168,19 @@ static void __init loongson2e_686b_func1_fixup(struct pci_dev *pdev)
 	printk(KERN_INFO"via686b fix: IDE done\n");
 }
 
-static void __init loongson2e_686b_func2_fixup(struct pci_dev *pdev)
+static void __devinit loongson2e_686b_func2_fixup(struct pci_dev *pdev)
 {
 	/* irq routing */
 	pci_write_config_byte(pdev, PCI_INTERRUPT_LINE, 10);
 }
 
-static void __init loongson2e_686b_func3_fixup(struct pci_dev *pdev)
+static void __devinit loongson2e_686b_func3_fixup(struct pci_dev *pdev)
 {
 	/* irq routing */
 	pci_write_config_byte(pdev, PCI_INTERRUPT_LINE, 11);
 }
 
-static void __init loongson2e_686b_func5_fixup(struct pci_dev *pdev)
+static void __devinit loongson2e_686b_func5_fixup(struct pci_dev *pdev)
 {
 	unsigned int val;
 	unsigned char c;
diff --git a/arch/mips/pci/fixup-lemote2f.c b/arch/mips/pci/fixup-lemote2f.c
index 4b9768d5d729..a7b917dcf604 100644
--- a/arch/mips/pci/fixup-lemote2f.c
+++ b/arch/mips/pci/fixup-lemote2f.c
@@ -96,21 +96,21 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 }
 
 /* CS5536 SPEC. fixup */
-static void __init loongson_cs5536_isa_fixup(struct pci_dev *pdev)
+static void __devinit loongson_cs5536_isa_fixup(struct pci_dev *pdev)
 {
 	/* the uart1 and uart2 interrupt in PIC is enabled as default */
 	pci_write_config_dword(pdev, PCI_UART1_INT_REG, 1);
 	pci_write_config_dword(pdev, PCI_UART2_INT_REG, 1);
 }
 
-static void __init loongson_cs5536_ide_fixup(struct pci_dev *pdev)
+static void __devinit loongson_cs5536_ide_fixup(struct pci_dev *pdev)
 {
 	/* setting the mutex pin as IDE function */
 	pci_write_config_dword(pdev, PCI_IDE_CFG_REG,
 			       CS5536_IDE_FLASH_SIGNATURE);
 }
 
-static void __init loongson_cs5536_acc_fixup(struct pci_dev *pdev)
+static void __devinit loongson_cs5536_acc_fixup(struct pci_dev *pdev)
 {
 	/* enable the AUDIO interrupt in PIC  */
 	pci_write_config_dword(pdev, PCI_ACC_INT_REG, 1);
@@ -118,14 +118,14 @@ static void __init loongson_cs5536_acc_fixup(struct pci_dev *pdev)
 	pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0xc0);
 }
 
-static void __init loongson_cs5536_ohci_fixup(struct pci_dev *pdev)
+static void __devinit loongson_cs5536_ohci_fixup(struct pci_dev *pdev)
 {
 	/* enable the OHCI interrupt in PIC */
 	/* THE OHCI, EHCI, UDC, OTG are shared with interrupt in PIC */
 	pci_write_config_dword(pdev, PCI_OHCI_INT_REG, 1);
 }
 
-static void __init loongson_cs5536_ehci_fixup(struct pci_dev *pdev)
+static void __devinit loongson_cs5536_ehci_fixup(struct pci_dev *pdev)
 {
 	u32 hi, lo;
 
@@ -137,7 +137,7 @@ static void __init loongson_cs5536_ehci_fixup(struct pci_dev *pdev)
 	pci_write_config_dword(pdev, PCI_EHCI_FLADJ_REG, 0x2000);
 }
 
-static void __init loongson_nec_fixup(struct pci_dev *pdev)
+static void __devinit loongson_nec_fixup(struct pci_dev *pdev)
 {
 	unsigned int val;
 
diff --git a/arch/mips/pci/fixup-malta.c b/arch/mips/pci/fixup-malta.c
index 0f48498bc231..70073c98ed32 100644
--- a/arch/mips/pci/fixup-malta.c
+++ b/arch/mips/pci/fixup-malta.c
@@ -49,10 +49,10 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 	return 0;
 }
 
-static void __init malta_piix_func0_fixup(struct pci_dev *pdev)
+static void __devinit malta_piix_func0_fixup(struct pci_dev *pdev)
 {
 	unsigned char reg_val;
-	static int piixirqmap[16] __initdata = {  /* PIIX PIRQC[A:D] irq mappings */
+	static int piixirqmap[16] __devinitdata = {  /* PIIX PIRQC[A:D] irq mappings */
 		0,  0, 	0,  3,
 		4,  5,  6,  7,
 		0,  9, 10, 11,
@@ -83,7 +83,7 @@ static void __init malta_piix_func0_fixup(struct pci_dev *pdev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_0,
 	 malta_piix_func0_fixup);
 
-static void __init malta_piix_func1_fixup(struct pci_dev *pdev)
+static void __devinit malta_piix_func1_fixup(struct pci_dev *pdev)
 {
 	unsigned char reg_val;
 
diff --git a/arch/mips/pci/fixup-sb1250.c b/arch/mips/pci/fixup-sb1250.c
index f0bb9146e6c0..d02900a72916 100644
--- a/arch/mips/pci/fixup-sb1250.c
+++ b/arch/mips/pci/fixup-sb1250.c
@@ -15,7 +15,7 @@
  * Set the BCM1250, etc. PCI host bridge's TRDY timeout
  * to the finite max.
  */
-static void __init quirk_sb1250_pci(struct pci_dev *dev)
+static void __devinit quirk_sb1250_pci(struct pci_dev *dev)
 {
 	pci_write_config_byte(dev, 0x40, 0xff);
 }
@@ -25,7 +25,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_PCI,
 /*
  * The BCM1250, etc. PCI/HT bridge reports as a host bridge.
  */
-static void __init quirk_sb1250_ht(struct pci_dev *dev)
+static void __devinit quirk_sb1250_ht(struct pci_dev *dev)
 {
 	dev->class = PCI_CLASS_BRIDGE_PCI << 8;
 }
@@ -35,7 +35,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_HT,
 /*
  * Set the SP1011 HT/PCI bridge's TRDY timeout to the finite max.
  */
-static void __init quirk_sp1011(struct pci_dev *dev)
+static void __devinit quirk_sp1011(struct pci_dev *dev)
 {
 	pci_write_config_byte(dev, 0x64, 0xff);
 }
diff --git a/arch/mips/pci/ops-tx4927.c b/arch/mips/pci/ops-tx4927.c
index a1e7e6d80c8c..bc13e29d2bb3 100644
--- a/arch/mips/pci/ops-tx4927.c
+++ b/arch/mips/pci/ops-tx4927.c
@@ -495,7 +495,7 @@ irqreturn_t tx4927_pcierr_interrupt(int irq, void *dev_id)
 }
 
 #ifdef CONFIG_TOSHIBA_FPCIB0
-static void __init tx4927_quirk_slc90e66_bridge(struct pci_dev *dev)
+static void __devinit tx4927_quirk_slc90e66_bridge(struct pci_dev *dev)
 {
 	struct tx4927_pcic_reg __iomem *pcicptr = pci_bus_to_pcicptr(dev->bus);
 
diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c
index 0fbe4c0c170a..fdc24440294c 100644
--- a/arch/mips/pci/pci-ip27.c
+++ b/arch/mips/pci/pci-ip27.c
@@ -212,7 +212,7 @@ static inline void pci_enable_swapping(struct pci_dev *dev)
 	bridge->b_widget.w_tflush;	/* Flush */
 }
 
-static void __init pci_fixup_ioc3(struct pci_dev *d)
+static void __devinit pci_fixup_ioc3(struct pci_dev *d)
 {
 	pci_disable_swapping(d);
 }
diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c
index 682efb0c108d..64eb71b15280 100644
--- a/arch/mips/txx9/generic/pci.c
+++ b/arch/mips/txx9/generic/pci.c
@@ -269,7 +269,7 @@ txx9_i8259_irq_setup(int irq)
 	return err;
 }
 
-static void __init quirk_slc90e66_bridge(struct pci_dev *dev)
+static void __devinit quirk_slc90e66_bridge(struct pci_dev *dev)
 {
 	int irq;	/* PCI/ISA Bridge interrupt */
 	u8 reg_64;
-- 
cgit v1.2.3


From e9a09aed3eff8b1706e4bc24e8b3b16283797353 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@enac.fr>
Date: Tue, 19 Jun 2012 14:39:54 +0200
Subject: HID: hid-multitouch: add support for Zytronic panels

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@enac.fr>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/Kconfig          | 1 +
 drivers/hid/hid-ids.h        | 3 +++
 drivers/hid/hid-multitouch.c | 5 +++++
 3 files changed, 9 insertions(+)

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index bef04c192768..3fda8c87f02c 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -386,6 +386,7 @@ config HID_MULTITOUCH
 	  - Unitec Panels
 	  - XAT optical touch panels
 	  - Xiroku optical touch panels
+	  - Zytronic touch panels
 
 	  If unsure, say N.
 
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index fc0c68e4b9ed..5e4168e7ed1c 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -805,6 +805,9 @@
 #define USB_VENDOR_ID_ZYDACRON	0x13EC
 #define USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL	0x0006
 
+#define USB_VENDOR_ID_ZYTRONIC		0x14c8
+#define USB_DEVICE_ID_ZYTRONIC_ZXY100	0x0005
+
 #define USB_VENDOR_ID_PRIMAX	0x0461
 #define USB_DEVICE_ID_PRIMAX_KEYBOARD	0x4e05
 
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 6e3332a99976..76479246d4ee 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -1048,6 +1048,11 @@ static const struct hid_device_id mt_devices[] = {
 		MT_USB_DEVICE(USB_VENDOR_ID_XIROKU,
 			USB_DEVICE_ID_XIROKU_CSR2) },
 
+	/* Zytronic panels */
+	{ .driver_data = MT_CLS_SERIAL,
+		MT_USB_DEVICE(USB_VENDOR_ID_ZYTRONIC,
+			USB_DEVICE_ID_ZYTRONIC_ZXY100) },
+
 	/* Generic MT device */
 	{ HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH, HID_ANY_ID, HID_ANY_ID) },
 	{ }
-- 
cgit v1.2.3


From 380e99fc44d79bc35af9ff1d3316ef4027ce775e Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <elezegarcia@gmail.com>
Date: Wed, 18 Jul 2012 10:05:26 -0300
Subject: cx25821: Remove bad strcpy to read-only char*

The strcpy was being used to set the name of the board.  Since the
destination char* was read-only and the name is set statically at
compile time; this was both wrong and redundant.

The type of char* is changed to const char* to prevent future errors.

Reported-by: Radek Masin <radek@masin.eu>
Signed-off-by: Ezequiel Garcia <elezegarcia@gmail.com>
[ Taking directly due to vacations   - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/media/video/cx25821/cx25821-core.c | 3 ---
 drivers/media/video/cx25821/cx25821.h      | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/media/video/cx25821/cx25821-core.c b/drivers/media/video/cx25821/cx25821-core.c
index 83c1aa6b2e6c..f11f6f07e915 100644
--- a/drivers/media/video/cx25821/cx25821-core.c
+++ b/drivers/media/video/cx25821/cx25821-core.c
@@ -904,9 +904,6 @@ static int cx25821_dev_setup(struct cx25821_dev *dev)
 	list_add_tail(&dev->devlist, &cx25821_devlist);
 	mutex_unlock(&cx25821_devlist_mutex);
 
-	strcpy(cx25821_boards[UNKNOWN_BOARD].name, "unknown");
-	strcpy(cx25821_boards[CX25821_BOARD].name, "cx25821");
-
 	if (dev->pci->device != 0x8210) {
 		pr_info("%s(): Exiting. Incorrect Hardware device = 0x%02x\n",
 			__func__, dev->pci->device);
diff --git a/drivers/media/video/cx25821/cx25821.h b/drivers/media/video/cx25821/cx25821.h
index b9aa801b00a7..029f2934a6d8 100644
--- a/drivers/media/video/cx25821/cx25821.h
+++ b/drivers/media/video/cx25821/cx25821.h
@@ -187,7 +187,7 @@ enum port {
 };
 
 struct cx25821_board {
-	char *name;
+	const char *name;
 	enum port porta;
 	enum port portb;
 	enum port portc;
-- 
cgit v1.2.3


From c6727932cfdb13501108b16c38463c09d5ec7a74 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
Date: Sat, 14 Jul 2012 14:33:09 +0300
Subject: UBIFS: fix a bug in empty space fix-up

UBIFS has a feature called "empty space fix-up" which is a quirk to work-around
limitations of dumb flasher programs. Namely, of those flashers that are unable
to skip NAND pages full of 0xFFs while flashing, resulting in empty space at
the end of half-filled eraseblocks to be unusable for UBIFS. This feature is
relatively new (introduced in v3.0).

The fix-up routine (fixup_free_space()) is executed only once at the very first
mount if the superblock has the 'space_fixup' flag set (can be done with -F
option of mkfs.ubifs). It basically reads all the UBIFS data and metadata and
writes it back to the same LEB. The routine assumes the image is pristine and
does not have anything in the journal.

There was a bug in 'fixup_free_space()' where it fixed up the log incorrectly.
All but one LEB of the log of a pristine file-system are empty. And one
contains just a commit start node. And 'fixup_free_space()' just unmapped this
LEB, which resulted in wiping the commit start node. As a result, some users
were unable to mount the file-system next time with the following symptom:

UBIFS error (pid 1): replay_log_leb: first log node at LEB 3:0 is not CS node
UBIFS error (pid 1): replay_log_leb: log error detected while replaying the log at LEB 3:0

The root-cause of this bug was that 'fixup_free_space()' wrongly assumed
that the beginning of empty space in the log head (c->lhead_offs) was known
on mount. However, it is not the case - it was always 0. UBIFS does not store
in it the master node and finds out by scanning the log on every mount.

The fix is simple - just pass commit start node size instead of 0 to
'fixup_leb()'.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
Cc: stable@vger.kernel.org [v3.0+]
Reported-by: Iwo Mergler <Iwo.Mergler@netcommwireless.com>
Tested-by: Iwo Mergler <Iwo.Mergler@netcommwireless.com>
Reported-by: James Nute <newten82@gmail.com>
---
 fs/ubifs/sb.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index ef3d1ba6d992..15e2fc5aa60b 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -718,8 +718,12 @@ static int fixup_free_space(struct ubifs_info *c)
 		lnum = ubifs_next_log_lnum(c, lnum);
 	}
 
-	/* Fixup the current log head */
-	err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
+	/*
+	 * Fixup the log head which contains the only a CS node at the
+	 * beginning.
+	 */
+	err = fixup_leb(c, c->lhead_lnum,
+			ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size));
 	if (err)
 		goto out;
 
-- 
cgit v1.2.3


From 9ff19309a9623f2963ac5a136782ea4d8b5d67fb Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Fri, 8 Jun 2012 01:19:07 +0300
Subject: ore: Fix NFS crash by supporting any unaligned RAID IO

In RAID_5/6 We used to not permit an IO that it's end
byte is not stripe_size aligned and spans more than one stripe.
.i.e the caller must check if after submission the actual
transferred bytes is shorter, and would need to resubmit
a new IO with the remainder.

Exofs supports this, and NFS was supposed to support this
as well with it's short write mechanism. But late testing has
exposed a CRASH when this is used with none-RPC layout-drivers.

The change at NFS is deep and risky, in it's place the fix
at ORE to lift the limitation is actually clean and simple.
So here it is below.

The principal here is that in the case of unaligned IO on
both ends, beginning and end, we will send two read requests
one like old code, before the calculation of the first stripe,
and also a new site, before the calculation of the last stripe.
If any "boundary" is aligned or the complete IO is within a single
stripe. we do a single read like before.

The code is clean and simple by splitting the old _read_4_write
into 3 even parts:
1._read_4_write_first_stripe
2. _read_4_write_last_stripe
3. _read_4_write_execute

And calling 1+3 at the same place as before. 2+3 before last
stripe, and in the case of all in a single stripe then 1+2+3
is preformed additively.

Why did I not think of it before. Well I had a strike of
genius because I have stared at this code for 2 years, and did
not find this simple solution, til today. Not that I did not try.

This solution is much better for NFS than the previous supposedly
solution because the short write was dealt  with out-of-band after
IO_done, which would cause for a seeky IO pattern where as in here
we execute in order. At both solutions we do 2 separate reads, only
here we do it within a single IO request. (And actually combine two
writes into a single submission)

NFS/exofs code need not change since the ORE API communicates the new
shorter length on return, what will happen is that this case would not
occur anymore.

hurray!!

[Stable this is an NFS bug since 3.2 Kernel should apply cleanly]
CC: Stable Tree <stable@kernel.org>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/exofs/ore_raid.c | 67 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 36 insertions(+), 31 deletions(-)

diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index d222c77cfa1b..fff2070c6751 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
  * ios->sp2d[p][*], xor is calculated the same way. These pages are
  * allocated/freed and don't go through cache
  */
-static int _read_4_write(struct ore_io_state *ios)
+static int _read_4_write_first_stripe(struct ore_io_state *ios)
 {
-	struct ore_io_state *ios_read;
 	struct ore_striping_info read_si;
 	struct __stripe_pages_2d *sp2d = ios->sp2d;
 	u64 offset = ios->si.first_stripe_start;
-	u64 last_stripe_end;
-	unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
-	unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1;
-	int ret;
+	unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
 
 	if (offset == ios->offset) /* Go to start collect $200 */
 		goto read_last_stripe;
@@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios)
 	min_p = _sp2d_min_pg(sp2d);
 	max_p = _sp2d_max_pg(sp2d);
 
+	ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
+		   offset, ios->offset, min_p, max_p);
+
 	for (c = 0; ; c++) {
 		ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
 		read_si.obj_offset += min_p * PAGE_SIZE;
@@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios)
 	}
 
 read_last_stripe:
+	return 0;
+}
+
+static int _read_4_write_last_stripe(struct ore_io_state *ios)
+{
+	struct ore_striping_info read_si;
+	struct __stripe_pages_2d *sp2d = ios->sp2d;
+	u64 offset;
+	u64 last_stripe_end;
+	unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
+	unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
+
 	offset = ios->offset + ios->length;
 	if (offset % PAGE_SIZE)
 		_add_to_r4w_last_page(ios, &offset);
@@ -527,15 +538,15 @@ read_last_stripe:
 	c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
 		       ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
 
-	BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end);
-	/* unaligned IO must be within a single stripe */
-
 	if (min_p == sp2d->pages_in_unit) {
 		/* Didn't do it yet */
 		min_p = _sp2d_min_pg(sp2d);
 		max_p = _sp2d_max_pg(sp2d);
 	}
 
+	ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
+		   offset, last_stripe_end, min_p, max_p);
+
 	while (offset < last_stripe_end) {
 		struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
 
@@ -568,6 +579,15 @@ read_last_stripe:
 	}
 
 read_it:
+	return 0;
+}
+
+static int _read_4_write_execute(struct ore_io_state *ios)
+{
+	struct ore_io_state *ios_read;
+	unsigned i;
+	int ret;
+
 	ios_read = ios->ios_read_4_write;
 	if (!ios_read)
 		return 0;
@@ -591,6 +611,8 @@ read_it:
 	}
 
 	_mark_read4write_pages_uptodate(ios_read, ret);
+	ore_put_io_state(ios_read);
+	ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
 	return 0;
 }
 
@@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
 			/* If first stripe, Read in all read4write pages
 			 * (if needed) before we calculate the first parity.
 			 */
-			_read_4_write(ios);
+			_read_4_write_first_stripe(ios);
 		}
+		if (!cur_len) /* If last stripe r4w pages of last stripe */
+			_read_4_write_last_stripe(ios);
+		_read_4_write_execute(ios);
 
 		for (i = 0; i < num_pages; i++) {
 			pages[i] = _raid_page_alloc();
@@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
 
 int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
 {
-	struct ore_layout *layout = ios->layout;
-
 	if (ios->parity_pages) {
+		struct ore_layout *layout = ios->layout;
 		unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
-		unsigned stripe_size = ios->si.bytes_in_stripe;
-		u64 last_stripe, first_stripe;
 
 		if (_sp2d_alloc(pages_in_unit, layout->group_width,
 				layout->parity, &ios->sp2d)) {
 			return -ENOMEM;
 		}
-
-		/* Round io down to last full strip */
-		first_stripe = div_u64(ios->offset, stripe_size);
-		last_stripe = div_u64(ios->offset + ios->length, stripe_size);
-
-		/* If an IO spans more then a single stripe it must end at
-		 * a stripe boundary. The reminder at the end is pushed into the
-		 * next IO.
-		 */
-		if (last_stripe != first_stripe) {
-			ios->length = last_stripe * stripe_size - ios->offset;
-
-			BUG_ON(!ios->length);
-			ios->nr_pages = (ios->length + PAGE_SIZE - 1) /
-					PAGE_SIZE;
-			ios->si.length = ios->length; /*make it consistent */
-		}
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From 62b62ad873f2accad9222a4d7ffbe1e93f6714c1 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Fri, 8 Jun 2012 04:30:40 +0300
Subject: ore: Remove support of partial IO request (NFS crash)

Do to OOM situations the ore might fail to allocate all resources
needed for IO of the full request. If some progress was possible
it would proceed with a partial/short request, for the sake of
forward progress.

Since this crashes NFS-core and exofs is just fine without it just
remove this contraption, and fail.

TODO:
	Support real forward progress with some reserved allocations
	of resources, such as mem pools and/or bio_sets

[Bug since 3.2 Kernel]
CC: Stable Tree <stable@kernel.org>
CC: Benny Halevy <bhalevy@tonian.com>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/exofs/ore.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 49cf230554a2..24a49d47e935 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)
 out:
 	ios->numdevs = devs_in_group;
 	ios->pages_consumed = cur_pg;
-	if (unlikely(ret)) {
-		if (length == ios->length)
-			return ret;
-		else
-			ios->length -= length;
-	}
-	return 0;
+	return ret;
 }
 
 int ore_create(struct ore_io_state *ios)
-- 
cgit v1.2.3


From 537632e0a54a5355cdd0330911d18c3b773f9cf7 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Wed, 11 Jul 2012 15:27:13 +0300
Subject: ore: Unlock r4w pages in exact reverse order of locking

The read-4-write pages are locked in address ascending order.
But where unlocked in a way easiest for coding. Fix that,
locks should be released in opposite order of locking, .i.e
descending address order.

I have not hit this dead-lock. It was found by inspecting the
dbug print-outs. I suspect there is an higher lock at caller that
protects us, but fix it regardless.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/exofs/ore_raid.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index fff2070c6751..5f376d14fdcc 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -144,26 +144,26 @@ static void _sp2d_reset(struct __stripe_pages_2d *sp2d,
 {
 	unsigned data_devs = sp2d->data_devs;
 	unsigned group_width = data_devs + sp2d->parity;
-	unsigned p;
+	int p, c;
 
 	if (!sp2d->needed)
 		return;
 
-	for (p = 0; p < sp2d->pages_in_unit; p++) {
-		struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
-
-		if (_1ps->write_count < group_width) {
-			unsigned c;
+	for (c = data_devs - 1; c >= 0; --c)
+		for (p = sp2d->pages_in_unit - 1; p >= 0; --p) {
+			struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
 
-			for (c = 0; c < data_devs; c++)
-				if (_1ps->page_is_read[c]) {
-					struct page *page = _1ps->pages[c];
+			if (_1ps->page_is_read[c]) {
+				struct page *page = _1ps->pages[c];
 
-					r4w->put_page(priv, page);
-					_1ps->page_is_read[c] = false;
-				}
+				r4w->put_page(priv, page);
+				_1ps->page_is_read[c] = false;
+			}
 		}
 
+	for (p = 0; p < sp2d->pages_in_unit; p++) {
+		struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
+
 		memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages));
 		_1ps->write_count = 0;
 		_1ps->tx = NULL;
-- 
cgit v1.2.3


From 9909d45a8557455ca5f8ee7af0f253debc851f1a Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Fri, 8 Jun 2012 05:29:40 +0300
Subject: pnfs-obj: don't leak objio_state if ore_write/read fails

[Bug since 3.2 Kernel]
CC: Stable Tree <stable@kernel.org>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/nfs/objlayout/objio_osd.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index b47277baebab..86d7595aca8f 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -454,7 +454,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata)
 	objios->ios->done = _read_done;
 	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
 		rdata->args.offset, rdata->args.count);
-	return ore_read(objios->ios);
+	ret = ore_read(objios->ios);
+	if (unlikely(ret))
+		objio_free_result(&objios->oir);
+	return ret;
 }
 
 /*
@@ -539,8 +542,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
 	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
 		wdata->args.offset, wdata->args.count);
 	ret = ore_write(objios->ios);
-	if (unlikely(ret))
+	if (unlikely(ret)) {
+		objio_free_result(&objios->oir);
 		return ret;
+	}
 
 	if (objios->sync)
 		_write_done(objios->ios, objios);
-- 
cgit v1.2.3


From c999ff68029ebd0f56ccae75444f640f6d5a27d2 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Fri, 8 Jun 2012 02:02:30 +0300
Subject: pnfs-obj: Fix __r4w_get_page when offset is beyond i_size

It is very common for the end of the file to be unaligned on
stripe size. But since we know it's beyond file's end then
the XOR should be preformed with all zeros.

Old code used to just read zeros out of the OSD devices, which is a great
waist. But what scares me more about this situation is that, we now have
pages attached to the file's mapping that are beyond i_size. I don't
like the kind of bugs this calls for.

Fix both birds, by returning a global zero_page, if offset is beyond
i_size.

TODO:
	Change the API to ->__r4w_get_page() so a NULL can be
	returned without being considered as error, since XOR API
	treats NULL entries as zero_pages.

[Bug since 3.2. Should apply the same way to all Kernels since]
CC: Stable Tree <stable@kernel.org>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/nfs/objlayout/objio_osd.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 86d7595aca8f..f50d3e8d6f22 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -489,8 +489,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
 	struct nfs_write_data *wdata = objios->oir.rpcdata;
 	struct address_space *mapping = wdata->header->inode->i_mapping;
 	pgoff_t index = offset / PAGE_SIZE;
-	struct page *page = find_get_page(mapping, index);
+	struct page *page;
+	loff_t i_size = i_size_read(wdata->header->inode);
 
+	if (offset >= i_size) {
+		*uptodate = true;
+		dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
+		return ZERO_PAGE(0);
+	}
+
+	page = find_get_page(mapping, index);
 	if (!page) {
 		page = find_or_create_page(mapping, index, GFP_NOFS);
 		if (unlikely(!page)) {
@@ -510,8 +518,10 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
 
 static void __r4w_put_page(void *priv, struct page *page)
 {
-	dprintk("%s: index=0x%lx\n", __func__, page->index);
-	page_cache_release(page);
+	dprintk("%s: index=0x%lx\n", __func__,
+		(page == ZERO_PAGE(0)) ? -1UL : page->index);
+	if (ZERO_PAGE(0) != page)
+		page_cache_release(page);
 	return;
 }
 
-- 
cgit v1.2.3


From 751f188dd5ab95b3f2b5f2f467c38aae5a2877eb Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 20 Jul 2012 14:25:03 +0100
Subject: dm raid1: fix crash with mirror recovery and discard

This patch fixes a crash when a discard request is sent during mirror
recovery.

Firstly, some background.  Generally, the following sequence happens during
mirror synchronization:
- function do_recovery is called
- do_recovery calls dm_rh_recovery_prepare
- dm_rh_recovery_prepare uses a semaphore to limit the number
  simultaneously recovered regions (by default the semaphore value is 1,
  so only one region at a time is recovered)
- dm_rh_recovery_prepare calls __rh_recovery_prepare,
  __rh_recovery_prepare asks the log driver for the next region to
  recover. Then, it sets the region state to DM_RH_RECOVERING. If there
  are no pending I/Os on this region, the region is added to
  quiesced_regions list. If there are pending I/Os, the region is not
  added to any list. It is added to the quiesced_regions list later (by
  dm_rh_dec function) when all I/Os finish.
- when the region is on quiesced_regions list, there are no I/Os in
  flight on this region. The region is popped from the list in
  dm_rh_recovery_start function. Then, a kcopyd job is started in the
  recover function.
- when the kcopyd job finishes, recovery_complete is called. It calls
  dm_rh_recovery_end. dm_rh_recovery_end adds the region to
  recovered_regions or failed_recovered_regions list (depending on
  whether the copy operation was successful or not).

The above mechanism assumes that if the region is in DM_RH_RECOVERING
state, no new I/Os are started on this region. When I/O is started,
dm_rh_inc_pending is called, which increases reg->pending count. When
I/O is finished, dm_rh_dec is called. It decreases reg->pending count.
If the count is zero and the region was in DM_RH_RECOVERING state,
dm_rh_dec adds it to the quiesced_regions list.

Consequently, if we call dm_rh_inc_pending/dm_rh_dec while the region is
in DM_RH_RECOVERING state, it could be added to quiesced_regions list
multiple times or it could be added to this list when kcopyd is copying
data (it is assumed that the region is not on any list while kcopyd does
its jobs). This results in memory corruption and crash.

There already exist bypasses for REQ_FLUSH requests: REQ_FLUSH requests
do not belong to any region, so they are always added to the sync list
in do_writes. dm_rh_inc_pending does not increase count for REQ_FLUSH
requests. In mirror_end_io, dm_rh_dec is never called for REQ_FLUSH
requests. These bypasses avoid the crash possibility described above.

These bypasses were improperly implemented for REQ_DISCARD when
the mirror target gained discard support in commit
5fc2ffeabb9ee0fc0e71ff16b49f34f0ed3d05b4 (dm raid1: support discard).

In do_writes, REQ_DISCARD requests is always added to the sync queue and
immediately dispatched (even if the region is in DM_RH_RECOVERING).  However,
dm_rh_inc and dm_rh_dec is called for REQ_DISCARD resusts.  So it violates the
rule that no I/Os are started on DM_RH_RECOVERING regions, and causes the list
corruption described above.

This patch changes it so that REQ_DISCARD requests follow the same path
as REQ_FLUSH. This avoids the crash.

Reference: https://bugzilla.redhat.com/837607

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-raid1.c       | 2 +-
 drivers/md/dm-region-hash.c | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index d039de8322f0..ea16984c0f08 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1214,7 +1214,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
 	 * We need to dec pending if this was a write.
 	 */
 	if (rw == WRITE) {
-		if (!(bio->bi_rw & REQ_FLUSH))
+		if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)))
 			dm_rh_dec(ms->rh, map_context->ll);
 		return error;
 	}
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index 7771ed212182..69732e03eb34 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -404,6 +404,9 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
 		return;
 	}
 
+	if (bio->bi_rw & REQ_DISCARD)
+		return;
+
 	/* We must inform the log that the sync count has changed. */
 	log->type->set_region_sync(log, region, 0);
 
@@ -524,7 +527,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
 	struct bio *bio;
 
 	for (bio = bios->head; bio; bio = bio->bi_next) {
-		if (bio->bi_rw & REQ_FLUSH)
+		if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))
 			continue;
 		rh_inc(rh, dm_rh_bio_to_region(rh, bio));
 	}
-- 
cgit v1.2.3


From 650d2a06b4fe1cc1d218c20e256650f68bf0ca31 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 20 Jul 2012 14:25:05 +0100
Subject: dm thin: do not send discards to shared blocks

When process_discard receives a partial discard that doesn't cover a
full block, it sends this discard down to that block. Unfortunately, the
block can be shared and the discard would corrupt the other snapshots
sharing this block.

This patch detects block sharing and ends the discard with success when
sending it to the shared block.

The above change means that if the device supports discard it can't be
guaranteed that a discard request zeroes data. Therefore, we set
ti->discard_zeroes_data_unsupported.

Thin target discard support with this bug arrived in commit
104655fd4dcebd50068ef30253a001da72e3a081 (dm thin: support discards).

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-thin.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index ce59824fb414..68694da0d21d 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1245,7 +1245,10 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
 
 			cell_release_singleton(cell, bio);
 			cell_release_singleton(cell2, bio);
-			remap_and_issue(tc, bio, lookup_result.block);
+			if ((!lookup_result.shared) && pool->pf.discard_passdown)
+				remap_and_issue(tc, bio, lookup_result.block);
+			else
+				bio_endio(bio, 0);
 		}
 		break;
 
@@ -2628,6 +2631,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	if (tc->pool->pf.discard_enabled) {
 		ti->discards_supported = 1;
 		ti->num_discard_requests = 1;
+		ti->discard_zeroes_data_unsupported = 1;
 	}
 
 	dm_put(pool_md);
-- 
cgit v1.2.3


From 7c8d3a42fe1c58a7e8fd3f6a013e7d7b474ff931 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 20 Jul 2012 14:25:07 +0100
Subject: dm raid1: set discard_zeroes_data_unsupported

We can't guarantee that REQ_DISCARD on dm-mirror zeroes the data even if
the underlying disks support zero on discard.  So this patch sets
ti->discard_zeroes_data_unsupported.

For example, if the mirror is in the process of resynchronizing, it may
happen that kcopyd reads a piece of data, then discard is sent on the
same area and then kcopyd writes the piece of data to another leg.
Consequently, the data is not zeroed.

The flag was made available by commit 983c7db347db8ce2d8453fd1d89b7a4bb6920d56
(dm crypt: always disable discard_zeroes_data).

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-raid1.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index ea16984c0f08..b58b7a33914a 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1084,6 +1084,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	ti->split_io = dm_rh_get_region_size(ms->rh);
 	ti->num_flush_requests = 1;
 	ti->num_discard_requests = 1;
+	ti->discard_zeroes_data_unsupported = 1;
 
 	ms->kmirrord_wq = alloc_workqueue("kmirrord",
 					  WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
-- 
cgit v1.2.3


From bc792e612e78a24ae0b30cc5b85f2368379ba4d4 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Fri, 20 Jul 2012 17:27:37 -0700
Subject: kdb: Revive dmesg command

The kgdb dmesg command is broken after the printk rework.  The old logic
in kdb code makes no sense in terms of current printk/logging storage
format, and KDB simply hangs forever.

This patch revives the command by switching to kmsg_dumper iterator.

The code is now much more simpler and shorter.

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/debug/kdb/kdb_main.c | 91 ++++++++++++++++-----------------------------
 1 file changed, 33 insertions(+), 58 deletions(-)

diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 67b847dfa2bb..df17c935d3c6 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -14,6 +14,7 @@
 #include <linux/ctype.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
+#include <linux/kmsg_dump.h>
 #include <linux/reboot.h>
 #include <linux/sched.h>
 #include <linux/sysrq.h>
@@ -2040,8 +2041,15 @@ static int kdb_env(int argc, const char **argv)
  */
 static int kdb_dmesg(int argc, const char **argv)
 {
-	char *syslog_data[4], *start, *end, c = '\0', *p;
-	int diag, logging, logsize, lines = 0, adjust = 0, n;
+	int diag;
+	int logging;
+	int lines = 0;
+	int adjust = 0;
+	int n = 0;
+	int skip = 0;
+	struct kmsg_dumper dumper = { .active = 1 };
+	size_t len;
+	char buf[201];
 
 	if (argc > 2)
 		return KDB_ARGCOUNT;
@@ -2064,22 +2072,10 @@ static int kdb_dmesg(int argc, const char **argv)
 		kdb_set(2, setargs);
 	}
 
-	/* syslog_data[0,1] physical start, end+1.  syslog_data[2,3]
-	 * logical start, end+1. */
-	kdb_syslog_data(syslog_data);
-	if (syslog_data[2] == syslog_data[3])
-		return 0;
-	logsize = syslog_data[1] - syslog_data[0];
-	start = syslog_data[2];
-	end = syslog_data[3];
-#define KDB_WRAP(p) (((p - syslog_data[0]) % logsize) + syslog_data[0])
-	for (n = 0, p = start; p < end; ++p) {
-		c = *KDB_WRAP(p);
-		if (c == '\n')
-			++n;
-	}
-	if (c != '\n')
-		++n;
+	kmsg_dump_rewind(&dumper);
+	while (kmsg_dump_get_line(&dumper, 1, NULL, 0, NULL))
+		n++;
+
 	if (lines < 0) {
 		if (adjust >= n)
 			kdb_printf("buffer only contains %d lines, nothing "
@@ -2087,21 +2083,11 @@ static int kdb_dmesg(int argc, const char **argv)
 		else if (adjust - lines >= n)
 			kdb_printf("buffer only contains %d lines, last %d "
 				   "lines printed\n", n, n - adjust);
-		if (adjust) {
-			for (; start < end && adjust; ++start) {
-				if (*KDB_WRAP(start) == '\n')
-					--adjust;
-			}
-			if (start < end)
-				++start;
-		}
-		for (p = start; p < end && lines; ++p) {
-			if (*KDB_WRAP(p) == '\n')
-				++lines;
-		}
-		end = p;
+		skip = adjust;
+		lines = abs(lines);
 	} else if (lines > 0) {
-		int skip = n - (adjust + lines);
+		skip = n - lines - adjust;
+		lines = abs(lines);
 		if (adjust >= n) {
 			kdb_printf("buffer only contains %d lines, "
 				   "nothing printed\n", n);
@@ -2112,35 +2098,24 @@ static int kdb_dmesg(int argc, const char **argv)
 			kdb_printf("buffer only contains %d lines, first "
 				   "%d lines printed\n", n, lines);
 		}
-		for (; start < end && skip; ++start) {
-			if (*KDB_WRAP(start) == '\n')
-				--skip;
-		}
-		for (p = start; p < end && lines; ++p) {
-			if (*KDB_WRAP(p) == '\n')
-				--lines;
-		}
-		end = p;
+	} else {
+		lines = n;
 	}
-	/* Do a line at a time (max 200 chars) to reduce protocol overhead */
-	c = '\n';
-	while (start != end) {
-		char buf[201];
-		p = buf;
-		if (KDB_FLAG(CMD_INTERRUPT))
-			return 0;
-		while (start < end && (c = *KDB_WRAP(start)) &&
-		       (p - buf) < sizeof(buf)-1) {
-			++start;
-			*p++ = c;
-			if (c == '\n')
-				break;
+
+	if (skip >= n || skip < 0)
+		return 0;
+
+	kmsg_dump_rewind(&dumper);
+	while (kmsg_dump_get_line(&dumper, 1, buf, sizeof(buf), &len)) {
+		if (skip) {
+			skip--;
+			continue;
 		}
-		*p = '\0';
-		kdb_printf("%s", buf);
+		if (!lines--)
+			break;
+
+		kdb_printf("%.*s\n", (int)len - 1, buf);
 	}
-	if (c != '\n')
-		kdb_printf("\n");
 
 	return 0;
 }
-- 
cgit v1.2.3


From 1b499d05eecbe04969516717a8e15afb6ad80689 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Fri, 20 Jul 2012 17:27:54 -0700
Subject: printk: Remove kdb_syslog_data

The function is no longer needed, so remove it.

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/debug/kdb/kdb_private.h |  1 -
 kernel/printk.c                | 15 ---------------
 2 files changed, 16 deletions(-)

diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index 47c4e56e513b..392ec6a25844 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -205,7 +205,6 @@ extern char kdb_grep_string[];
 extern int kdb_grep_leading;
 extern int kdb_grep_trailing;
 extern char *kdb_cmds[];
-extern void kdb_syslog_data(char *syslog_data[]);
 extern unsigned long kdb_task_state_string(const char *);
 extern char kdb_task_state_char (const struct task_struct *);
 extern unsigned long kdb_task_state(const struct task_struct *p,
diff --git a/kernel/printk.c b/kernel/printk.c
index 177fa49357a5..c8129678dfbf 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1192,21 +1192,6 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
 	return do_syslog(type, buf, len, SYSLOG_FROM_CALL);
 }
 
-#ifdef	CONFIG_KGDB_KDB
-/* kdb dmesg command needs access to the syslog buffer.  do_syslog()
- * uses locks so it cannot be used during debugging.  Just tell kdb
- * where the start and end of the physical and logical logs are.  This
- * is equivalent to do_syslog(3).
- */
-void kdb_syslog_data(char *syslog_data[4])
-{
-	syslog_data[0] = log_buf;
-	syslog_data[1] = log_buf + log_buf_len;
-	syslog_data[2] = log_buf + log_first_idx;
-	syslog_data[3] = log_buf + log_next_idx;
-}
-#endif	/* CONFIG_KGDB_KDB */
-
 static bool __read_mostly ignore_loglevel;
 
 static int __init ignore_loglevel_setup(char *str)
-- 
cgit v1.2.3


From 533827c921c34310f63e859e1d6d0feec439657d Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Fri, 20 Jul 2012 17:28:07 -0700
Subject: printk: Implement some unlocked kmsg_dump functions

If used from KDB, the locked variants are prone to deadlocks (suppose we
got to the debugger w/ the logbuf lock held).

So, we have to implement a few routines that grab no logbuf lock.

Yet we don't need these functions in modules, so we don't export them.

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmsg_dump.h | 16 +++++++++++
 kernel/printk.c           | 68 ++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 71 insertions(+), 13 deletions(-)

diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index d6bd50110ec2..2e7a1e032c71 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -55,12 +55,17 @@ struct kmsg_dumper {
 #ifdef CONFIG_PRINTK
 void kmsg_dump(enum kmsg_dump_reason reason);
 
+bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
+			       char *line, size_t size, size_t *len);
+
 bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
 			char *line, size_t size, size_t *len);
 
 bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
 			  char *buf, size_t size, size_t *len);
 
+void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper);
+
 void kmsg_dump_rewind(struct kmsg_dumper *dumper);
 
 int kmsg_dump_register(struct kmsg_dumper *dumper);
@@ -71,6 +76,13 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason)
 {
 }
 
+static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper,
+					     bool syslog, const char *line,
+					     size_t size, size_t *len)
+{
+	return false;
+}
+
 static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
 				const char *line, size_t size, size_t *len)
 {
@@ -83,6 +95,10 @@ static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
 	return false;
 }
 
+static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
+{
+}
+
 static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper)
 {
 }
diff --git a/kernel/printk.c b/kernel/printk.c
index c8129678dfbf..ac4bc9e79465 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -2510,7 +2510,7 @@ void kmsg_dump(enum kmsg_dump_reason reason)
 }
 
 /**
- * kmsg_dump_get_line - retrieve one kmsg log line
+ * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version)
  * @dumper: registered kmsg dumper
  * @syslog: include the "<4>" prefixes
  * @line: buffer to copy the line to
@@ -2525,11 +2525,12 @@ void kmsg_dump(enum kmsg_dump_reason reason)
  *
  * A return value of FALSE indicates that there are no more records to
  * read.
+ *
+ * The function is similar to kmsg_dump_get_line(), but grabs no locks.
  */
-bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
-			char *line, size_t size, size_t *len)
+bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
+			       char *line, size_t size, size_t *len)
 {
-	unsigned long flags;
 	struct log *msg;
 	size_t l = 0;
 	bool ret = false;
@@ -2537,7 +2538,6 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
 	if (!dumper->active)
 		goto out;
 
-	raw_spin_lock_irqsave(&logbuf_lock, flags);
 	if (dumper->cur_seq < log_first_seq) {
 		/* messages are gone, move to first available one */
 		dumper->cur_seq = log_first_seq;
@@ -2545,10 +2545,8 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
 	}
 
 	/* last entry */
-	if (dumper->cur_seq >= log_next_seq) {
-		raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+	if (dumper->cur_seq >= log_next_seq)
 		goto out;
-	}
 
 	msg = log_from_idx(dumper->cur_idx);
 	l = msg_print_text(msg, 0, syslog, line, size);
@@ -2556,12 +2554,41 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
 	dumper->cur_idx = log_next(dumper->cur_idx);
 	dumper->cur_seq++;
 	ret = true;
-	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
 out:
 	if (len)
 		*len = l;
 	return ret;
 }
+
+/**
+ * kmsg_dump_get_line - retrieve one kmsg log line
+ * @dumper: registered kmsg dumper
+ * @syslog: include the "<4>" prefixes
+ * @line: buffer to copy the line to
+ * @size: maximum size of the buffer
+ * @len: length of line placed into buffer
+ *
+ * Start at the beginning of the kmsg buffer, with the oldest kmsg
+ * record, and copy one record into the provided buffer.
+ *
+ * Consecutive calls will return the next available record moving
+ * towards the end of the buffer with the youngest messages.
+ *
+ * A return value of FALSE indicates that there are no more records to
+ * read.
+ */
+bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
+			char *line, size_t size, size_t *len)
+{
+	unsigned long flags;
+	bool ret;
+
+	raw_spin_lock_irqsave(&logbuf_lock, flags);
+	ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len);
+	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+
+	return ret;
+}
 EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
 
 /**
@@ -2663,6 +2690,24 @@ out:
 }
 EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
 
+/**
+ * kmsg_dump_rewind_nolock - reset the interator (unlocked version)
+ * @dumper: registered kmsg dumper
+ *
+ * Reset the dumper's iterator so that kmsg_dump_get_line() and
+ * kmsg_dump_get_buffer() can be called again and used multiple
+ * times within the same dumper.dump() callback.
+ *
+ * The function is similar to kmsg_dump_rewind(), but grabs no locks.
+ */
+void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
+{
+	dumper->cur_seq = clear_seq;
+	dumper->cur_idx = clear_idx;
+	dumper->next_seq = log_next_seq;
+	dumper->next_idx = log_next_idx;
+}
+
 /**
  * kmsg_dump_rewind - reset the interator
  * @dumper: registered kmsg dumper
@@ -2676,10 +2721,7 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&logbuf_lock, flags);
-	dumper->cur_seq = clear_seq;
-	dumper->cur_idx = clear_idx;
-	dumper->next_seq = log_next_seq;
-	dumper->next_idx = log_next_idx;
+	kmsg_dump_rewind_nolock(dumper);
 	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
 }
 EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
-- 
cgit v1.2.3


From c064da47144b11be4697a4611f640086a663016a Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Fri, 20 Jul 2012 17:28:25 -0700
Subject: kdb: Switch to nolock variants of kmsg_dump functions

The locked variants are prone to deadlocks (suppose we got to the
debugger w/ the logbuf lock held), so let's switch to nolock variants.

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/debug/kdb/kdb_main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index df17c935d3c6..1f91413edb87 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2072,8 +2072,8 @@ static int kdb_dmesg(int argc, const char **argv)
 		kdb_set(2, setargs);
 	}
 
-	kmsg_dump_rewind(&dumper);
-	while (kmsg_dump_get_line(&dumper, 1, NULL, 0, NULL))
+	kmsg_dump_rewind_nolock(&dumper);
+	while (kmsg_dump_get_line_nolock(&dumper, 1, NULL, 0, NULL))
 		n++;
 
 	if (lines < 0) {
@@ -2105,8 +2105,8 @@ static int kdb_dmesg(int argc, const char **argv)
 	if (skip >= n || skip < 0)
 		return 0;
 
-	kmsg_dump_rewind(&dumper);
-	while (kmsg_dump_get_line(&dumper, 1, buf, sizeof(buf), &len)) {
+	kmsg_dump_rewind_nolock(&dumper);
+	while (kmsg_dump_get_line_nolock(&dumper, 1, buf, sizeof(buf), &len)) {
 		if (skip) {
 			skip--;
 			continue;
-- 
cgit v1.2.3


From bff9d1865640dcb4d9711dcc50714e9a8b859453 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 21 Jul 2012 20:24:52 +0200
Subject: Remove SYSTEM_SUSPEND_DISK system state

The SYSTEM_SUSPEND_DISK system state is never used, so drop it.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e07f5e0c5df4..604382143bcf 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -377,7 +377,6 @@ extern enum system_states {
 	SYSTEM_HALT,
 	SYSTEM_POWER_OFF,
 	SYSTEM_RESTART,
-	SYSTEM_SUSPEND_DISK,
 } system_state;
 
 #define TAINT_PROPRIETARY_MODULE	0
-- 
cgit v1.2.3


From 28a33cbc24e4256c143dce96c7d93bf423229f92 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 21 Jul 2012 13:58:29 -0700
Subject: Linux 3.5

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index aa8e315f26ef..4bb09e1b1230 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 5
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Saber-toothed Squirrel
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3


From 36d93d88a5396baa135f8bcde7b8501dfe3b8e53 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 22 Jun 2012 16:25:19 +0200
Subject: Revert "x86/early_printk: Replace obsolete simple_strtoul() usage
 with kstrtoint()"

This reverts commit fbd24153c48b8425b09c161a020483cd77da870e.

This commit is subtly buggy: kstrto*int() can return an error but
it's not checked in every path. simple_strtoul() on the other hand
could not fail, so this patch subtly intruduces new failure modes.

Signed-off-by: Shuah Khan <shuahkhan@gmail.com>
Link: http://lkml.kernel.org/r/1338424803.3569.5.camel@lorien2
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/early_printk.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 5e4771266f1a..9b9f18b49918 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -119,7 +119,7 @@ static __init void early_serial_init(char *s)
 	unsigned char c;
 	unsigned divisor;
 	unsigned baud = DEFAULT_BAUD;
-	ssize_t ret;
+	char *e;
 
 	if (*s == ',')
 		++s;
@@ -127,14 +127,14 @@ static __init void early_serial_init(char *s)
 	if (*s) {
 		unsigned port;
 		if (!strncmp(s, "0x", 2)) {
-			ret = kstrtoint(s, 16, &early_serial_base);
+			early_serial_base = simple_strtoul(s, &e, 16);
 		} else {
 			static const int __initconst bases[] = { 0x3f8, 0x2f8 };
 
 			if (!strncmp(s, "ttyS", 4))
 				s += 4;
-			ret = kstrtouint(s, 10, &port);
-			if (ret || port > 1)
+			port = simple_strtoul(s, &e, 10);
+			if (port > 1 || s == e)
 				port = 0;
 			early_serial_base = bases[port];
 		}
@@ -149,8 +149,8 @@ static __init void early_serial_init(char *s)
 	outb(0x3, early_serial_base + MCR);	/* DTR + RTS */
 
 	if (*s) {
-		ret = kstrtouint(s, 0, &baud);
-		if (ret || baud == 0)
+		baud = simple_strtoul(s, &e, 0);
+		if (baud == 0 || s == e)
 			baud = DEFAULT_BAUD;
 	}
 
-- 
cgit v1.2.3