From 151766fce8bee0e3e6076c8b829f9fcc0a2412ae Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Sat, 28 Apr 2012 14:30:40 +0800 Subject: Revert "x86/platform: Add a wallclock_init func to x86_platforms ops" This reverts commit cf8ff6b6ab0e99dd3058852f4ec76a6140abadec. Just found this commit is a function duplicatation of commit 6b617e22 "x86/platform: Add a wallclock_init func to x86_init.timers ops". Let's revert it and sorry for the noise. Signed-off-by: Feng Tang Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Jacob Pan Cc: Alan Cox Cc: Dirk Brandewie Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/x86_init.h | 2 -- arch/x86/kernel/setup.c | 2 -- arch/x86/kernel/x86_init.c | 2 -- 3 files changed, 6 deletions(-) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index c090af10ac7d..42d2ae18dab2 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -156,7 +156,6 @@ struct x86_cpuinit_ops { /** * struct x86_platform_ops - platform specific runtime functions * @calibrate_tsc: calibrate TSC - * @wallclock_init: init the wallclock device * @get_wallclock: get time from HW clock like RTC etc. * @set_wallclock: set time back to HW clock * @is_untracked_pat_range exclude from PAT logic @@ -167,7 +166,6 @@ struct x86_cpuinit_ops { */ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); - void (*wallclock_init)(void); unsigned long (*get_wallclock)(void); int (*set_wallclock)(unsigned long nowtime); void (*iommu_shutdown)(void); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 366c688d619e..58a07b10812c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1027,8 +1027,6 @@ void __init setup_arch(char **cmdline_p) x86_init.timers.wallclock_init(); - x86_platform.wallclock_init(); - mcheck_init(); arch_init_ideal_nops(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 35c5e543f550..9f3167e891ef 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -29,7 +29,6 @@ void __init x86_init_uint_noop(unsigned int unused) { } void __init x86_init_pgd_noop(pgd_t *unused) { } int __init iommu_init_noop(void) { return 0; } void iommu_shutdown_noop(void) { } -void wallclock_init_noop(void) { } /* * The platform setup functions are preset with the default functions @@ -101,7 +100,6 @@ static int default_i8042_detect(void) { return 1; }; struct x86_platform_ops x86_platform = { .calibrate_tsc = native_calibrate_tsc, - .wallclock_init = wallclock_init_noop, .get_wallclock = mach_get_cmos_time, .set_wallclock = mach_set_rtc_mmss, .iommu_shutdown = iommu_shutdown_noop, -- cgit v1.2.3 From f841d792e38f75f5e25b0b66f7b5d235d180a735 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 30 Mar 2012 23:11:35 +0800 Subject: x86: Return IRQ_SET_MASK_OK_NOCOPY from irq affinity functions The interrupt chip irq_set_affinity() functions copy the affinity mask to irq_data->affinity but return 0, i.e. IRQ_SET_MASK_OK. IRQ_SET_MASK_OK causes the core code to do another redundant copy. Return IRQ_SET_MASK_OK_NOCOPY to avoid this. Signed-off-by: Jiang Liu Cc: Suresh Siddha Cc: Yinghai Lu Cc: Naga Chumbalkar Cc: Jacob Pan Cc: Cliff Wickman Cc: Jiang Liu Cc: Keping Chen Link: http://lkml.kernel.org/r/1333120296-13563-4-git-send-email-jiang.liu@huawei.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 9 +++++---- arch/x86/platform/uv/uv_irq.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ac96561d1a99..bce2001b2644 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2270,6 +2270,7 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, /* Only the high 8 bits are valid. */ dest = SET_APIC_LOGICAL_ID(dest); __target_IO_APIC_irq(irq, dest, data->chip_data); + ret = IRQ_SET_MASK_OK_NOCOPY; } raw_spin_unlock_irqrestore(&ioapic_lock, flags); return ret; @@ -3092,7 +3093,7 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) __write_msi_msg(data->msi_desc, &msg); - return 0; + return IRQ_SET_MASK_OK_NOCOPY; } #endif /* CONFIG_SMP */ @@ -3214,7 +3215,7 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, dmar_msi_write(irq, &msg); - return 0; + return IRQ_SET_MASK_OK_NOCOPY; } #endif /* CONFIG_SMP */ @@ -3267,7 +3268,7 @@ static int hpet_msi_set_affinity(struct irq_data *data, hpet_msi_write(data->handler_data, &msg); - return 0; + return IRQ_SET_MASK_OK_NOCOPY; } #endif /* CONFIG_SMP */ @@ -3340,7 +3341,7 @@ ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) return -1; target_ht_irq(data->irq, dest, cfg->vector); - return 0; + return IRQ_SET_MASK_OK_NOCOPY; } #endif diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index f25c2765a5c9..a22c41656b52 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -222,7 +222,7 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, if (cfg->move_in_progress) send_cleanup_vector(cfg); - return 0; + return IRQ_SET_MASK_OK_NOCOPY; } /* -- cgit v1.2.3 From 4ee0444bebf3d0399c93841d98826ade193e6330 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Tue, 29 May 2012 15:15:56 +0800 Subject: hexagon: SMP: Remove call to ipi_call_lock()/ipi_call_unlock() ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock protects only the call_function data structure itself, but it's completely unrelated to cpu_online_mask. The mask to which the IPIs are sent is calculated before call_function.lock is taken in smp_call_function_many(), so the locking around set_cpu_online() is pointless and can be removed. [ tglx: Massaged changelog ] Signed-off-by: Yong Zhang Cc: ralf@linux-mips.org Cc: sshtylyov@mvista.com Cc: david.daney@cavium.com Cc: nikunj@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: axboe@kernel.dk Cc: Richard Kuo Cc: linux-hexagon@vger.kernel.org Link: http://lkml.kernel.org/r/1338275765-3217-2-git-send-email-yong.zhang0@gmail.com Acked-by: Srivatsa S. Bhat Acked-by: Peter Zijlstra Signed-off-by: Thomas Gleixner --- arch/hexagon/kernel/smp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c index f7264621e58d..149fbefc1a4d 100644 --- a/arch/hexagon/kernel/smp.c +++ b/arch/hexagon/kernel/smp.c @@ -180,9 +180,7 @@ void __cpuinit start_secondary(void) notify_cpu_starting(cpu); - ipi_call_lock(); set_cpu_online(cpu, true); - ipi_call_unlock(); local_irq_enable(); -- cgit v1.2.3 From 4cbd19fcf9d49f28766415b96eb6201bc263d817 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Tue, 29 May 2012 15:15:57 +0800 Subject: mn10300: SMP: Remove call to ipi_call_lock()/ipi_call_unlock() ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock protects only the call_function data structure itself, but it's completely unrelated to cpu_online_mask. The mask to which the IPIs are sent is calculated before call_function.lock is taken in smp_call_function_many(), so the locking around set_cpu_online() is pointless and can be removed. [ tglx: Massaged changelog ] Signed-off-by: Yong Zhang Cc: ralf@linux-mips.org Cc: sshtylyov@mvista.com Cc: david.daney@cavium.com Cc: nikunj@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: axboe@kernel.dk Cc: peterz@infradead.org Cc: David Howells Cc: Koichi Yasutake Cc: linux-am33-list@redhat.com Link: http://lkml.kernel.org/r/1338275765-3217-3-git-send-email-yong.zhang0@gmail.com Acked-by: Srivatsa S. Bhat Acked-by: Peter Zijlstra Signed-off-by: Thomas Gleixner --- arch/mn10300/kernel/smp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c index 090d35d36973..e62c223e4c45 100644 --- a/arch/mn10300/kernel/smp.c +++ b/arch/mn10300/kernel/smp.c @@ -876,9 +876,7 @@ static void __init smp_online(void) notify_cpu_starting(cpu); - ipi_call_lock(); set_cpu_online(cpu, true); - ipi_call_unlock(); local_irq_enable(); } -- cgit v1.2.3 From ac4216d6922f90c459d531e051c832dadde3a8f7 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Tue, 29 May 2012 15:15:58 +0800 Subject: parisc: Smp: remove call to ipi_call_lock()/ipi_call_unlock() ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock protects only the call_function data structure itself, but it's completely unrelated to cpu_online_mask. The mask to which the IPIs are sent is calculated before call_function.lock is taken in smp_call_function_many(), so the locking around set_cpu_online() is pointless and can be removed. [ tglx: Massaged changelog ] Signed-off-by: Yong Zhang Cc: ralf@linux-mips.org Cc: sshtylyov@mvista.com Cc: david.daney@cavium.com Cc: nikunj@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: axboe@kernel.dk Cc: peterz@infradead.org Cc: James E.J. Bottomley Cc: Helge Deller Cc: linux-parisc@vger.kernel.org Link: http://lkml.kernel.org/r/1338275765-3217-4-git-send-email-yong.zhang0@gmail.com Acked-by: Srivatsa S. Bhat Acked-by: Peter Zijlstra Signed-off-by: Thomas Gleixner --- arch/parisc/kernel/smp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index a47828d31fe6..6266730efd61 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -300,9 +300,7 @@ smp_cpu_init(int cpunum) notify_cpu_starting(cpunum); - ipi_call_lock(); set_cpu_online(cpunum, true); - ipi_call_unlock(); /* Initialise the idle task for this CPU */ atomic_inc(&init_mm.mm_count); -- cgit v1.2.3 From 46ce7fbfdbe0be6aaf0e816331aac08a74a00e5a Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Tue, 29 May 2012 15:15:59 +0800 Subject: S390: Smp: remove call to ipi_call_lock()/ipi_call_unlock() ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock protects only the call_function data structure itself, but it's completely unrelated to cpu_online_mask. The mask to which the IPIs are sent is calculated before call_function.lock is taken in smp_call_function_many(), so the locking around set_cpu_online() is pointless and can be removed. [ tglx: Massaged changelog ] Signed-off-by: Yong Zhang Cc: ralf@linux-mips.org Cc: sshtylyov@mvista.com Cc: david.daney@cavium.com Cc: nikunj@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: axboe@kernel.dk Cc: peterz@infradead.org Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux390@de.ibm.com Cc: linux-s390@vger.kernel.org Link: http://lkml.kernel.org/r/1338275765-3217-5-git-send-email-yong.zhang0@gmail.com Acked-by: Srivatsa S. Bhat Acked-by: Peter Zijlstra Signed-off-by: Thomas Gleixner --- arch/s390/kernel/smp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 15cca26ccb6c..8dca9c248ac7 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -717,9 +717,7 @@ static void __cpuinit smp_start_secondary(void *cpuvoid) init_cpu_vtimer(); pfault_init(); notify_cpu_starting(smp_processor_id()); - ipi_call_lock(); set_cpu_online(smp_processor_id(), true); - ipi_call_unlock(); local_irq_enable(); /* cpu_idle will call schedule for us */ cpu_idle(); -- cgit v1.2.3 From 8efdfc3a4ed009c978dab6609d15fb958e7cff12 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Tue, 29 May 2012 15:16:00 +0800 Subject: tile: SMP: Remove call to ipi_call_lock()/ipi_call_unlock() ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock protects only the call_function data structure itself, but it's completely unrelated to cpu_online_mask. The mask to which the IPIs are sent is calculated before call_function.lock is taken in smp_call_function_many(), so the locking around set_cpu_online() is pointless and can be removed. [ tglx: Massaged changelog ] Signed-off-by: Yong Zhang Cc: ralf@linux-mips.org Cc: sshtylyov@mvista.com Cc: david.daney@cavium.com Cc: nikunj@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: axboe@kernel.dk Cc: peterz@infradead.org Cc: Chris Metcalf Link: http://lkml.kernel.org/r/1338275765-3217-6-git-send-email-yong.zhang0@gmail.com Acked-by: Srivatsa S. Bhat Acked-by: Peter Zijlstra Signed-off-by: Thomas Gleixner --- arch/tile/kernel/smpboot.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 84873fbe8f27..e686c5ac90be 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -198,17 +198,7 @@ void __cpuinit online_secondary(void) notify_cpu_starting(smp_processor_id()); - /* - * We need to hold call_lock, so there is no inconsistency - * between the time smp_call_function() determines number of - * IPI recipients, and the time when the determination is made - * for which cpus receive the IPI. Holding this - * lock helps us to not include this cpu in a currently in progress - * smp_call_function(). - */ - ipi_call_lock(); set_cpu_online(smp_processor_id(), 1); - ipi_call_unlock(); __get_cpu_var(cpu_state) = CPU_ONLINE; /* Set up tile-specific state for this cpu. */ -- cgit v1.2.3 From 3b6f70fd7dd4e19fc674ec99e389bf0da5589525 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Tue, 29 May 2012 15:16:01 +0800 Subject: x86-smp-remove-call-to-ipi_call_lock-ipi_call_unlock ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock protects only the call_function data structure itself, but it's completely unrelated to cpu_online_mask. The mask to which the IPIs are sent is calculated before call_function.lock is taken in smp_call_function_many(), so the locking around set_cpu_online() is pointless and can be removed. [ tglx: Massaged changelog ] Signed-off-by: Yong Zhang Cc: ralf@linux-mips.org Cc: sshtylyov@mvista.com Cc: david.daney@cavium.com Cc: nikunj@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: axboe@kernel.dk Cc: peterz@infradead.org Cc: Konrad Rzeszutek Wilk Cc: Jeremy Fitzhardinge Cc: Ingo Molnar Cc: "H. Peter Anvin" Link: http://lkml.kernel.org/r/1338275765-3217-7-git-send-email-yong.zhang0@gmail.com Acked-by: Srivatsa S. Bhat Acked-by: Peter Zijlstra Signed-off-by: Thomas Gleixner --- arch/x86/kernel/smpboot.c | 9 --------- arch/x86/xen/smp.c | 2 -- 2 files changed, 11 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f56f96da77f5..b2fd28ff84b5 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -255,22 +255,13 @@ notrace static void __cpuinit start_secondary(void *unused) check_tsc_sync_target(); /* - * We need to hold call_lock, so there is no inconsistency - * between the time smp_call_function() determines number of - * IPI recipients, and the time when the determination is made - * for which cpus receive the IPI. Holding this - * lock helps us to not include this cpu in a currently in progress - * smp_call_function(). - * * We need to hold vector_lock so there the set of online cpus * does not change while we are assigning vectors to cpus. Holding * this lock ensures we don't half assign or remove an irq from a cpu. */ - ipi_call_lock(); lock_vector_lock(); set_cpu_online(smp_processor_id(), true); unlock_vector_lock(); - ipi_call_unlock(); per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; x86_platform.nmi_init(); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index afb250d22a6b..f58dca7a6e52 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -80,9 +80,7 @@ static void __cpuinit cpu_bringup(void) notify_cpu_starting(cpu); - ipi_call_lock(); set_cpu_online(cpu, true); - ipi_call_unlock(); this_cpu_write(cpu_state, CPU_ONLINE); -- cgit v1.2.3 From 459165e25030c0023cb54f73c14261a3d2f4a244 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Tue, 29 May 2012 15:16:02 +0800 Subject: ia64: SMP: Remove call to ipi_call_lock_irq()/ipi_call_unlock_irq() ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock protects only the call_function data structure itself, but it's completely unrelated to cpu_online_mask. The mask to which the IPIs are sent is calculated before call_function.lock is taken in smp_call_function_many(), so the locking around set_cpu_online() is pointless and can be removed. [ tglx: Massaged changelog ] Signed-off-by: Yong Zhang Cc: ralf@linux-mips.org Cc: sshtylyov@mvista.com Cc: david.daney@cavium.com Cc: nikunj@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: axboe@kernel.dk Cc: peterz@infradead.org Cc: Tony Luck Cc: Fenghua Yu Cc: linux-ia64@vger.kernel.org Link: http://lkml.kernel.org/r/1338275765-3217-8-git-send-email-yong.zhang0@gmail.com Acked-by: Srivatsa S. Bhat Acked-by: Peter Zijlstra Signed-off-by: Thomas Gleixner --- arch/ia64/kernel/smpboot.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 1113b8aba07f..963d2db53bfa 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -382,7 +382,6 @@ smp_callin (void) set_numa_node(cpu_to_node_map[cpuid]); set_numa_mem(local_memory_node(cpu_to_node_map[cpuid])); - ipi_call_lock_irq(); spin_lock(&vector_lock); /* Setup the per cpu irq handling data structures */ __setup_vector_irq(cpuid); @@ -390,7 +389,6 @@ smp_callin (void) set_cpu_online(cpuid, true); per_cpu(cpu_state, cpuid) = CPU_ONLINE; spin_unlock(&vector_lock); - ipi_call_unlock_irq(); smp_setup_percpu_timer(); -- cgit v1.2.3 From bc6833009583bd5b096ef7aa2bb006854a5a2dce Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Tue, 29 May 2012 16:27:33 +0800 Subject: SPARC: SMP: Remove call to ipi_call_lock_irq()/ipi_call_unlock_irq() ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock protects only the call_function data structure itself, but it's completely unrelated to cpu_online_mask. The mask to which the IPIs are sent is calculated before call_function.lock is taken in smp_call_function_many(), so the locking around set_cpu_online() is pointless and can be removed. Delay irq enable to after set_cpu_online(). [ tglx: Massaged changelog ] Signed-off-by: Yong Zhang Cc: ralf@linux-mips.org Cc: sshtylyov@mvista.com Cc: david.daney@cavium.com Cc: nikunj@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: axboe@kernel.dk Cc: peterz@infradead.org Cc: sparclinux@vger.kernel.org Link: http://lkml.kernel.org/r/20120529082732.GA4250@zhy Acked-by: "David S. Miller" Acked-by: Srivatsa S. Bhat Acked-by: Peter Zijlstra Signed-off-by: Thomas Gleixner --- arch/sparc/kernel/smp_64.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index f591598d92f6..781bcb10b8bd 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -103,8 +103,6 @@ void __cpuinit smp_callin(void) if (cheetah_pcache_forced_on) cheetah_enable_pcache(); - local_irq_enable(); - callin_flag = 1; __asm__ __volatile__("membar #Sync\n\t" "flush %%g6" : : : "memory"); @@ -124,9 +122,8 @@ void __cpuinit smp_callin(void) while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) rmb(); - ipi_call_lock_irq(); set_cpu_online(cpuid, true); - ipi_call_unlock_irq(); + local_irq_enable(); /* idle thread is expected to have preempt disabled */ preempt_disable(); @@ -1308,9 +1305,7 @@ int __cpu_disable(void) mdelay(1); local_irq_disable(); - ipi_call_lock(); set_cpu_online(cpu, false); - ipi_call_unlock(); cpu_map_rebuild(); -- cgit v1.2.3 From 72ec61a9e8347375e37c05e23511173d8451c3d4 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Tue, 29 May 2012 15:16:04 +0800 Subject: POWERPC: Smp: remove call to ipi_call_lock()/ipi_call_unlock() ipi_call_lock/unlock() lock resp. unlock call_function.lock. This lock protects only the call_function data structure itself, but it's completely unrelated to cpu_online_mask. The mask to which the IPIs are sent is calculated before call_function.lock is taken in smp_call_function_many(), so the locking around set_cpu_online() is pointless and can be removed. [ tglx: Massaged changelog ] Signed-off-by: Yong Zhang Cc: ralf@linux-mips.org Cc: sshtylyov@mvista.com Cc: david.daney@cavium.com Cc: nikunj@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: axboe@kernel.dk Cc: peterz@infradead.org Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1338275765-3217-10-git-send-email-yong.zhang0@gmail.com Acked-by: Srivatsa S. Bhat Acked-by: Peter Zijlstra Signed-off-by: Thomas Gleixner --- arch/powerpc/kernel/smp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index e4cb34322de4..e1417c42155c 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -571,7 +571,6 @@ void __devinit start_secondary(void *unused) if (system_state == SYSTEM_RUNNING) vdso_data->processorCount++; #endif - ipi_call_lock(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); /* Update sibling maps */ @@ -601,7 +600,6 @@ void __devinit start_secondary(void *unused) of_node_put(np); } of_node_put(l2_cache); - ipi_call_unlock(); local_irq_enable(); -- cgit v1.2.3 From 8feb8e896d77439146d2e2ab3d0ab55bb5baf5fc Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Tue, 29 May 2012 15:16:05 +0800 Subject: smp: Remove ipi_call_lock[_irq]()/ipi_call_unlock[_irq]() There is no user of those APIs anymore, just remove it. Signed-off-by: Yong Zhang Cc: ralf@linux-mips.org Cc: sshtylyov@mvista.com Cc: david.daney@cavium.com Cc: nikunj@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: axboe@kernel.dk Cc: Andrew Morton Link: http://lkml.kernel.org/r/1338275765-3217-11-git-send-email-yong.zhang0@gmail.com Acked-by: Srivatsa S. Bhat Acked-by: Peter Zijlstra Signed-off-by: Thomas Gleixner --- include/linux/smp.h | 4 ---- kernel/smp.c | 20 -------------------- 2 files changed, 24 deletions(-) diff --git a/include/linux/smp.h b/include/linux/smp.h index 717fb746c9a8..a34d4f15430d 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -90,10 +90,6 @@ void kick_all_cpus_sync(void); void __init call_function_init(void); void generic_smp_call_function_single_interrupt(void); void generic_smp_call_function_interrupt(void); -void ipi_call_lock(void); -void ipi_call_unlock(void); -void ipi_call_lock_irq(void); -void ipi_call_unlock_irq(void); #else static inline void call_function_init(void) { } #endif diff --git a/kernel/smp.c b/kernel/smp.c index d0ae5b24875e..29dd40a9f2f4 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -581,26 +581,6 @@ int smp_call_function(smp_call_func_t func, void *info, int wait) return 0; } EXPORT_SYMBOL(smp_call_function); - -void ipi_call_lock(void) -{ - raw_spin_lock(&call_function.lock); -} - -void ipi_call_unlock(void) -{ - raw_spin_unlock(&call_function.lock); -} - -void ipi_call_lock_irq(void) -{ - raw_spin_lock_irq(&call_function.lock); -} - -void ipi_call_unlock_irq(void) -{ - raw_spin_unlock_irq(&call_function.lock); -} #endif /* USE_GENERIC_SMP_HELPERS */ /* Setup configured maximum number of CPUs to activate */ -- cgit v1.2.3 From 43cc7e86f3200b094e2960b732623aeec00b482d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 May 2012 17:26:16 +0200 Subject: smp: Remove num_booting_cpus() No users. Signed-off-by: Thomas Gleixner Cc: Srivatsa S. Bhat Cc: Rusty Russell --- arch/m32r/include/asm/smp.h | 5 ----- arch/x86/include/asm/smp.h | 5 ----- include/linux/smp.h | 1 - 3 files changed, 11 deletions(-) diff --git a/arch/m32r/include/asm/smp.h b/arch/m32r/include/asm/smp.h index cf7829a61551..c689b828dfe2 100644 --- a/arch/m32r/include/asm/smp.h +++ b/arch/m32r/include/asm/smp.h @@ -79,11 +79,6 @@ static __inline__ int cpu_number_map(int cpu) return cpu; } -static __inline__ unsigned int num_booting_cpus(void) -{ - return cpumask_weight(&cpu_callout_map); -} - extern void smp_send_timer(void); extern unsigned long send_IPI_mask_phys(const cpumask_t*, int, int); diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index f48394513c37..2ffa95dc2333 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -169,11 +169,6 @@ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); void smp_store_cpu_info(int id); #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) -/* We don't mark CPUs online until __cpu_up(), so we need another measure */ -static inline int num_booting_cpus(void) -{ - return cpumask_weight(cpu_callout_mask); -} #else /* !CONFIG_SMP */ #define wbinvd_on_cpu(cpu) wbinvd() static inline int wbinvd_on_all_cpus(void) diff --git a/include/linux/smp.h b/include/linux/smp.h index a34d4f15430d..dd6f06be3c9f 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -177,7 +177,6 @@ static inline int up_smp_call_function(smp_call_func_t func, void *info) } while (0) static inline void smp_send_reschedule(int cpu) { } -#define num_booting_cpus() 1 #define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) -- cgit v1.2.3 From 7db971b235480849aa5b9209b67b62e987b3181b Mon Sep 17 00:00:00 2001 From: Ido Yariv Date: Sun, 3 Jun 2012 01:11:34 +0300 Subject: x86/platform: Introduce APIC post-initialization callback Some subarchitectures (such as vSMP) need to slightly adjust the underlying APIC structure. Add an APIC post-initialization callback to 'struct x86_platform_ops' for this purpose and use it for adjusting the APIC structure on vSMP systems. Signed-off-by: Ido Yariv Acked-by: Shai Fultheim Link: http://lkml.kernel.org/r/1338675095-27260-1-git-send-email-ido@wizery.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/x86_init.h | 2 ++ arch/x86/kernel/apic/probe_32.c | 3 +++ arch/x86/kernel/apic/probe_64.c | 11 ++--------- arch/x86/kernel/vsmp_64.c | 13 +++++++++++++ 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index c090af10ac7d..c377d9ccb696 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -164,6 +164,7 @@ struct x86_cpuinit_ops { * @i8042_detect pre-detect if i8042 controller exists * @save_sched_clock_state: save state for sched_clock() on suspend * @restore_sched_clock_state: restore state for sched_clock() on resume + * @apic_post_init: adjust apic if neeeded */ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); @@ -177,6 +178,7 @@ struct x86_platform_ops { int (*i8042_detect)(void); void (*save_sched_clock_state)(void); void (*restore_sched_clock_state)(void); + void (*apic_post_init)(void); }; struct pci_dev; diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1b291da09e60..8616d5198e16 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -208,6 +208,9 @@ void __init default_setup_apic_routing(void) if (apic->setup_apic_routing) apic->setup_apic_routing(); + + if (x86_platform.apic_post_init) + x86_platform.apic_post_init(); } void __init generic_apic_probe(void) diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 3fe986698929..1793dba7a741 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -23,11 +23,6 @@ #include #include -static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) -{ - return hard_smp_processor_id() >> index_msb; -} - /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. */ @@ -48,10 +43,8 @@ void __init default_setup_apic_routing(void) } } - if (is_vsmp_box()) { - /* need to update phys_pkg_id */ - apic->phys_pkg_id = apicid_phys_pkg_id; - } + if (x86_platform.apic_post_init) + x86_platform.apic_post_init(); } /* Same for both flat and physical. */ diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 8eeb55a551b4..59eea855f451 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -187,12 +187,25 @@ static void __init vsmp_cap_cpus(void) #endif } +static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) +{ + return hard_smp_processor_id() >> index_msb; +} + +static void vsmp_apic_post_init(void) +{ + /* need to update phys_pkg_id */ + apic->phys_pkg_id = apicid_phys_pkg_id; +} + void __init vsmp_init(void) { detect_vsmp_box(); if (!is_vsmp_box()) return; + x86_platform.apic_post_init = vsmp_apic_post_init; + vsmp_cap_cpus(); set_vsmp_pv_ops(); -- cgit v1.2.3 From c767a54ba0657e52e6edaa97cbe0b0a8bf1c1655 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 21 May 2012 19:50:07 -0700 Subject: x86/debug: Add KERN_ to bare printks, convert printks to pr_ Use a more current logging style: - Bare printks should have a KERN_ for consistency's sake - Add pr_fmt where appropriate - Neaten some macro definitions - Convert some Ok output to OK - Use "%s: ", __func__ in pr_fmt for summit - Convert some printks to pr_ Message output is not identical in all cases. Signed-off-by: Joe Perches Cc: levinsasha928@gmail.com Link: http://lkml.kernel.org/r/1337655007.24226.10.camel@joe2Laptop [ merged two similar patches, tidied up the changelog ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/floppy.h | 2 +- arch/x86/include/asm/pci_x86.h | 8 ++- arch/x86/include/asm/pgtable-2level.h | 4 +- arch/x86/include/asm/pgtable-3level.h | 6 +-- arch/x86/include/asm/pgtable_64.h | 8 +-- arch/x86/kernel/alternative.c | 17 +++--- arch/x86/kernel/amd_nb.c | 10 ++-- arch/x86/kernel/apic/io_apic.c | 38 ++++++------- arch/x86/kernel/apic/summit_32.c | 22 ++++---- arch/x86/kernel/apm_32.c | 29 +++++----- arch/x86/kernel/cpu/bugs.c | 20 +++---- arch/x86/kernel/cpu/mcheck/mce.c | 22 ++++---- arch/x86/kernel/cpu/perf_event_intel.c | 14 ++--- arch/x86/kernel/dumpstack.c | 4 +- arch/x86/kernel/dumpstack_32.c | 24 ++++----- arch/x86/kernel/dumpstack_64.c | 20 +++---- arch/x86/kernel/irq.c | 4 +- arch/x86/kernel/module.c | 32 ++++++----- arch/x86/kernel/pci-calgary_64.c | 34 ++++++------ arch/x86/kernel/process.c | 34 ++++++------ arch/x86/kernel/process_64.c | 8 +-- arch/x86/kernel/reboot.c | 14 +++-- arch/x86/kernel/signal.c | 5 +- arch/x86/kernel/smpboot.c | 97 ++++++++++++++++------------------ arch/x86/kernel/traps.c | 19 +++---- arch/x86/kernel/tsc.c | 50 +++++++++--------- arch/x86/kernel/vm86_32.c | 6 ++- arch/x86/kernel/vsyscall_64.c | 17 +++--- arch/x86/kernel/xsave.c | 12 +++-- 29 files changed, 301 insertions(+), 279 deletions(-) diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h index dbe82a5c5eac..d3d74698dce9 100644 --- a/arch/x86/include/asm/floppy.h +++ b/arch/x86/include/asm/floppy.h @@ -99,7 +99,7 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id) virtual_dma_residue += virtual_dma_count; virtual_dma_count = 0; #ifdef TRACE_FLPY_INT - printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", + printk(KERN_DEBUG "count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", virtual_dma_count, virtual_dma_residue, calls, bytes, dma_wait); calls = 0; diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index b3a531746026..5ad24a89b19b 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -7,9 +7,13 @@ #undef DEBUG #ifdef DEBUG -#define DBG(x...) printk(x) +#define DBG(fmt, ...) printk(fmt, ##__VA_ARGS__) #else -#define DBG(x...) +#define DBG(fmt, ...) \ +do { \ + if (0) \ + printk(fmt, ##__VA_ARGS__); \ +} while (0) #endif #define PCI_PROBE_BIOS 0x0001 diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 98391db840c6..f2b489cf1602 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -2,9 +2,9 @@ #define _ASM_X86_PGTABLE_2LEVEL_H #define pte_ERROR(e) \ - printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) + pr_err("%s:%d: bad pte %08lx\n", __FILE__, __LINE__, (e).pte_low) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + pr_err("%s:%d: bad pgd %08lx\n", __FILE__, __LINE__, pgd_val(e)) /* * Certain architectures need to do special things when PTEs diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 43876f16caf1..f824cfbaa9d4 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -9,13 +9,13 @@ */ #define pte_ERROR(e) \ - printk("%s:%d: bad pte %p(%08lx%08lx).\n", \ + pr_err("%s:%d: bad pte %p(%08lx%08lx)\n", \ __FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low) #define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %p(%016Lx).\n", \ + pr_err("%s:%d: bad pmd %p(%016Lx)\n", \ __FILE__, __LINE__, &(e), pmd_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %p(%016Lx).\n", \ + pr_err("%s:%d: bad pgd %p(%016Lx)\n", \ __FILE__, __LINE__, &(e), pgd_val(e)) /* Rules for using set_pte: the pte being assigned *must* be diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 975f709e09ae..8251be02301e 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -26,16 +26,16 @@ extern pgd_t init_level4_pgt[]; extern void paging_init(void); #define pte_ERROR(e) \ - printk("%s:%d: bad pte %p(%016lx).\n", \ + pr_err("%s:%d: bad pte %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pte_val(e)) #define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %p(%016lx).\n", \ + pr_err("%s:%d: bad pmd %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pmd_val(e)) #define pud_ERROR(e) \ - printk("%s:%d: bad pud %p(%016lx).\n", \ + pr_err("%s:%d: bad pud %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pud_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %p(%016lx).\n", \ + pr_err("%s:%d: bad pgd %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pgd_val(e)) struct mm_struct; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 1f84794f0759..1729d720299a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) "SMP alternatives: " fmt + #include #include #include @@ -63,8 +65,11 @@ static int __init setup_noreplace_paravirt(char *str) __setup("noreplace-paravirt", setup_noreplace_paravirt); #endif -#define DPRINTK(fmt, args...) if (debug_alternative) \ - printk(KERN_DEBUG fmt, args) +#define DPRINTK(fmt, ...) \ +do { \ + if (debug_alternative) \ + printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ +} while (0) /* * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes @@ -428,7 +433,7 @@ void alternatives_smp_switch(int smp) * If this still occurs then you should see a hang * or crash shortly after this line: */ - printk("lockdep: fixing up alternatives.\n"); + pr_info("lockdep: fixing up alternatives\n"); #endif if (noreplace_smp || smp_alt_once || skip_smp_alternatives) @@ -444,14 +449,14 @@ void alternatives_smp_switch(int smp) if (smp == smp_mode) { /* nothing */ } else if (smp) { - printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); + pr_info("switching to SMP code\n"); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_lock(mod->locks, mod->locks_end, mod->text, mod->text_end); } else { - printk(KERN_INFO "SMP alternatives: switching to UP code\n"); + pr_info("switching to UP code\n"); set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); list_for_each_entry(mod, &smp_alt_modules, next) @@ -546,7 +551,7 @@ void __init alternative_instructions(void) #ifdef CONFIG_SMP if (smp_alt_once) { if (1 == num_possible_cpus()) { - printk(KERN_INFO "SMP alternatives: switching to UP code\n"); + pr_info("switching to UP code\n"); set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index be16854591cc..f29f6dd6bc08 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -2,6 +2,9 @@ * Shared support code for AMD K8 northbridges and derivates. * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -258,7 +261,7 @@ void amd_flush_garts(void) } spin_unlock_irqrestore(&gart_lock, flags); if (!flushed) - printk("nothing to flush?\n"); + pr_notice("nothing to flush?\n"); } EXPORT_SYMBOL_GPL(amd_flush_garts); @@ -269,11 +272,10 @@ static __init int init_amd_nbs(void) err = amd_cache_northbridges(); if (err < 0) - printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n"); + pr_notice("Cannot enumerate AMD northbridges\n"); if (amd_cache_gart() < 0) - printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, " - "GART support disabled.\n"); + pr_notice("Cannot initialize GART flush words, GART support disabled\n"); return err; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ac96561d1a99..5155d6f806f5 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -448,8 +448,8 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi entry = alloc_irq_pin_list(node); if (!entry) { - printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", - node, apic, pin); + pr_err("can not alloc irq_pin_list (%d,%d,%d)\n", + node, apic, pin); return -ENOMEM; } entry->apic = apic; @@ -661,7 +661,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) ioapic_mask_entry(apic, pin); entry = ioapic_read_entry(apic, pin); if (entry.irr) - printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n", + pr_err("Unable to reset IRR for apic: %d, pin :%d\n", mpc_ioapic_id(apic), pin); } @@ -895,7 +895,7 @@ static int irq_polarity(int idx) } case 2: /* reserved */ { - printk(KERN_WARNING "broken BIOS!!\n"); + pr_warn("broken BIOS!!\n"); polarity = 1; break; } @@ -906,7 +906,7 @@ static int irq_polarity(int idx) } default: /* invalid */ { - printk(KERN_WARNING "broken BIOS!!\n"); + pr_warn("broken BIOS!!\n"); polarity = 1; break; } @@ -948,7 +948,7 @@ static int irq_trigger(int idx) } default: { - printk(KERN_WARNING "broken BIOS!!\n"); + pr_warn("broken BIOS!!\n"); trigger = 1; break; } @@ -962,7 +962,7 @@ static int irq_trigger(int idx) } case 2: /* reserved */ { - printk(KERN_WARNING "broken BIOS!!\n"); + pr_warn("broken BIOS!!\n"); trigger = 1; break; } @@ -973,7 +973,7 @@ static int irq_trigger(int idx) } default: /* invalid */ { - printk(KERN_WARNING "broken BIOS!!\n"); + pr_warn("broken BIOS!!\n"); trigger = 0; break; } @@ -991,7 +991,7 @@ static int pin_2_irq(int idx, int apic, int pin) * Debugging check, we are in big trouble if this message pops up! */ if (mp_irqs[idx].dstirq != pin) - printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); + pr_err("broken BIOS or MPTABLE parser, ayiee!!\n"); if (test_bit(bus, mp_bus_not_pci)) { irq = mp_irqs[idx].srcbusirq; @@ -1521,7 +1521,6 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) reg_03.raw = io_apic_read(ioapic_idx, 3); raw_spin_unlock_irqrestore(&ioapic_lock, flags); - printk("\n"); printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx)); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); @@ -1578,7 +1577,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) i, ir_entry->index ); - printk("%1d %1d %1d %1d %1d " + pr_cont("%1d %1d %1d %1d %1d " "%1d %1d %X %02X\n", ir_entry->format, ir_entry->mask, @@ -1598,7 +1597,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) i, entry.dest ); - printk("%1d %1d %1d %1d %1d " + pr_cont("%1d %1d %1d %1d %1d " "%1d %1d %02X\n", entry.mask, entry.trigger, @@ -1651,8 +1650,8 @@ __apicdebuginit(void) print_IO_APICs(void) continue; printk(KERN_DEBUG "IRQ%d ", irq); for_each_irq_pin(entry, cfg->irq_2_pin) - printk("-> %d:%d", entry->apic, entry->pin); - printk("\n"); + pr_cont("-> %d:%d", entry->apic, entry->pin); + pr_cont("\n"); } printk(KERN_INFO ".................................... done.\n"); @@ -1665,9 +1664,9 @@ __apicdebuginit(void) print_APIC_field(int base) printk(KERN_DEBUG); for (i = 0; i < 8; i++) - printk(KERN_CONT "%08x", apic_read(base + i*0x10)); + pr_cont("%08x", apic_read(base + i*0x10)); - printk(KERN_CONT "\n"); + pr_cont("\n"); } __apicdebuginit(void) print_local_APIC(void *dummy) @@ -1769,7 +1768,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v); } } - printk("\n"); + pr_cont("\n"); } __apicdebuginit(void) print_local_APICs(int maxcpu) @@ -2065,7 +2064,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) reg_00.raw = io_apic_read(ioapic_idx, 0); raw_spin_unlock_irqrestore(&ioapic_lock, flags); if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) - printk("could not set ID!\n"); + pr_cont("could not set ID!\n"); else apic_printk(APIC_VERBOSE, " ok.\n"); } @@ -3563,7 +3562,8 @@ static int __init io_apic_get_unique_id(int ioapic, int apic_id) /* Sanity check */ if (reg_00.bits.ID != apic_id) { - printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); + pr_err("IOAPIC[%d]: Unable to change apic_id!\n", + ioapic); return -1; } } diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 659897c00755..e97d542ccdd0 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -26,6 +26,8 @@ * */ +#define pr_fmt(fmt) "summit: %s: " fmt, __func__ + #include #include #include @@ -235,8 +237,8 @@ static int summit_apic_id_registered(void) static void summit_setup_apic_routing(void) { - printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", - nr_ioapics); + pr_info("Enabling APIC mode: Summit. Using %d I/O APICs\n", + nr_ioapics); } static int summit_cpu_present_to_apicid(int mps_cpu) @@ -275,7 +277,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask) int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { - printk("%s: Not a valid mask!\n", __func__); + pr_err("Not a valid mask!\n"); return BAD_APICID; } apicid |= new_apicid; @@ -355,7 +357,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) } } if (i == rio_table_hdr->num_rio_dev) { - printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__); + pr_err("Couldn't find owner Cyclone for Winnipeg!\n"); return last_bus; } @@ -366,7 +368,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) } } if (i == rio_table_hdr->num_scal_dev) { - printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__); + pr_err("Couldn't find owner Twister for Cyclone!\n"); return last_bus; } @@ -396,7 +398,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) num_buses = 9; break; default: - printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__); + pr_info("Unsupported Winnipeg type!\n"); return last_bus; } @@ -411,13 +413,15 @@ static int build_detail_arrays(void) int i, scal_detail_size, rio_detail_size; if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) { - printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev); + pr_warn("MAX_NUMNODES too low! Defined as %d, but system has %d nodes\n", + MAX_NUMNODES, rio_table_hdr->num_scal_dev); return 0; } switch (rio_table_hdr->version) { default: - printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version); + pr_warn("Invalid Rio Grande Table Version: %d\n", + rio_table_hdr->version); return 0; case 2: scal_detail_size = 11; @@ -462,7 +466,7 @@ void setup_summit(void) offset = *((unsigned short *)(ptr + offset)); } if (!rio_table_hdr) { - printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__); + pr_err("Unable to locate Rio Grande Table in EBDA - bailing!\n"); return; } diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 07b0c0db466c..d65464e43503 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -201,6 +201,8 @@ * http://www.microsoft.com/whdc/archive/amp_12.mspx] */ +#define pr_fmt(fmt) "apm: " fmt + #include #include @@ -485,11 +487,11 @@ static void apm_error(char *str, int err) if (error_table[i].key == err) break; if (i < ERROR_COUNT) - printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); + pr_notice("%s: %s\n", str, error_table[i].msg); else if (err < 0) - printk(KERN_NOTICE "apm: %s: linux error code %i\n", str, err); + pr_notice("%s: linux error code %i\n", str, err); else - printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", + pr_notice("%s: unknown error code %#2.2x\n", str, err); } @@ -1184,7 +1186,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender) static int notified; if (notified++ == 0) - printk(KERN_ERR "apm: an event queue overflowed\n"); + pr_err("an event queue overflowed\n"); if (++as->event_tail >= APM_MAX_EVENTS) as->event_tail = 0; } @@ -1447,7 +1449,7 @@ static void apm_mainloop(void) static int check_apm_user(struct apm_user *as, const char *func) { if (as == NULL || as->magic != APM_BIOS_MAGIC) { - printk(KERN_ERR "apm: %s passed bad filp\n", func); + pr_err("%s passed bad filp\n", func); return 1; } return 0; @@ -1586,7 +1588,7 @@ static int do_release(struct inode *inode, struct file *filp) as1 = as1->next) ; if (as1 == NULL) - printk(KERN_ERR "apm: filp not in user list\n"); + pr_err("filp not in user list\n"); else as1->next = as->next; } @@ -1600,11 +1602,9 @@ static int do_open(struct inode *inode, struct file *filp) struct apm_user *as; as = kmalloc(sizeof(*as), GFP_KERNEL); - if (as == NULL) { - printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", - sizeof(*as)); + if (as == NULL) return -ENOMEM; - } + as->magic = APM_BIOS_MAGIC; as->event_tail = as->event_head = 0; as->suspends_pending = as->standbys_pending = 0; @@ -2313,16 +2313,16 @@ static int __init apm_init(void) } if (apm_info.disabled) { - printk(KERN_NOTICE "apm: disabled on user request.\n"); + pr_notice("disabled on user request.\n"); return -ENODEV; } if ((num_online_cpus() > 1) && !power_off && !smp) { - printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n"); + pr_notice("disabled - APM is not SMP safe.\n"); apm_info.disabled = 1; return -ENODEV; } if (!acpi_disabled) { - printk(KERN_NOTICE "apm: overridden by ACPI.\n"); + pr_notice("overridden by ACPI.\n"); apm_info.disabled = 1; return -ENODEV; } @@ -2356,8 +2356,7 @@ static int __init apm_init(void) kapmd_task = kthread_create(apm, NULL, "kapmd"); if (IS_ERR(kapmd_task)) { - printk(KERN_ERR "apm: disabled - Unable to start kernel " - "thread.\n"); + pr_err("disabled - Unable to start kernel thread\n"); err = PTR_ERR(kapmd_task); kapmd_task = NULL; remove_proc_entry("apm", NULL); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 46674fbb62ba..c97bb7b5a9f8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -55,8 +55,8 @@ static void __init check_fpu(void) if (!boot_cpu_data.hard_math) { #ifndef CONFIG_MATH_EMULATION - printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); - printk(KERN_EMERG "Giving up.\n"); + pr_emerg("No coprocessor found and no math emulation present\n"); + pr_emerg("Giving up\n"); for (;;) ; #endif return; @@ -86,7 +86,7 @@ static void __init check_fpu(void) boot_cpu_data.fdiv_bug = fdiv_bug; if (boot_cpu_data.fdiv_bug) - printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n"); + pr_warn("Hmm, FPU with FDIV bug\n"); } static void __init check_hlt(void) @@ -94,16 +94,16 @@ static void __init check_hlt(void) if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) return; - printk(KERN_INFO "Checking 'hlt' instruction... "); + pr_info("Checking 'hlt' instruction... "); if (!boot_cpu_data.hlt_works_ok) { - printk("disabled\n"); + pr_cont("disabled\n"); return; } halt(); halt(); halt(); halt(); - printk(KERN_CONT "OK.\n"); + pr_cont("OK\n"); } /* @@ -116,7 +116,7 @@ static void __init check_popad(void) #ifndef CONFIG_X86_POPAD_OK int res, inp = (int) &res; - printk(KERN_INFO "Checking for popad bug... "); + pr_info("Checking for popad bug... "); __asm__ __volatile__( "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " : "=&a" (res) @@ -127,9 +127,9 @@ static void __init check_popad(void) * CPU hard. Too bad. */ if (res != 12345678) - printk(KERN_CONT "Buggy.\n"); + pr_cont("Buggy\n"); else - printk(KERN_CONT "OK.\n"); + pr_cont("OK\n"); #endif } @@ -161,7 +161,7 @@ void __init check_bugs(void) { identify_boot_cpu(); #ifndef CONFIG_SMP - printk(KERN_INFO "CPU: "); + pr_info("CPU: "); print_cpu_info(&boot_cpu_data); #endif check_config(); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0a687fd185e6..5623b4b5d511 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -7,6 +7,9 @@ * Copyright 2008 Intel Corporation * Author: Andi Kleen */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -210,7 +213,7 @@ static void drain_mcelog_buffer(void) cpu_relax(); if (!m->finished && retries >= 4) { - pr_err("MCE: skipping error being logged currently!\n"); + pr_err("skipping error being logged currently!\n"); break; } } @@ -1167,8 +1170,9 @@ int memory_failure(unsigned long pfn, int vector, int flags) { /* mce_severity() should not hand us an ACTION_REQUIRED error */ BUG_ON(flags & MF_ACTION_REQUIRED); - printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n" - "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn); + pr_err("Uncorrected memory error in page 0x%lx ignored\n" + "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", + pfn); return 0; } @@ -1358,11 +1362,10 @@ static int __cpuinit __mcheck_cpu_cap_init(void) b = cap & MCG_BANKCNT_MASK; if (!banks) - printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); + pr_info("CPU supports %d MCE banks\n", b); if (b > MAX_NR_BANKS) { - printk(KERN_WARNING - "MCE: Using only %u machine check banks out of %u\n", + pr_warn("Using only %u machine check banks out of %u\n", MAX_NR_BANKS, b); b = MAX_NR_BANKS; } @@ -1419,7 +1422,7 @@ static void __mcheck_cpu_init_generic(void) static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) { if (c->x86_vendor == X86_VENDOR_UNKNOWN) { - pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); + pr_info("unknown CPU type - not enabling MCE support\n"); return -EOPNOTSUPP; } @@ -1574,7 +1577,7 @@ static void __mcheck_cpu_init_timer(void) /* Handle unconfigured int18 (should never happen) */ static void unexpected_machine_check(struct pt_regs *regs, long error_code) { - printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", + pr_err("CPU#%d: Unexpected int18 (Machine Check)\n", smp_processor_id()); } @@ -1893,8 +1896,7 @@ static int __init mcheck_enable(char *str) get_option(&str, &monarch_timeout); } } else { - printk(KERN_INFO "mce argument %s ignored. Please use /sys\n", - str); + pr_info("mce argument %s ignored. Please use /sys\n", str); return 0; } return 1; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 166546ec6aef..9e3f5d6e3d20 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -5,6 +5,8 @@ * among events on a single PMU. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -1000,7 +1002,7 @@ static void intel_pmu_reset(void) local_irq_save(flags); - printk("clearing PMU state on CPU#%d\n", smp_processor_id()); + pr_info("clearing PMU state on CPU#%d\n", smp_processor_id()); for (idx = 0; idx < x86_pmu.num_counters; idx++) { checking_wrmsrl(x86_pmu_config_addr(idx), 0ull); @@ -1638,14 +1640,14 @@ static __init void intel_clovertown_quirk(void) * But taken together it might just make sense to not enable PEBS on * these chips. */ - printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); + pr_warn("PEBS disabled due to CPU errata\n"); x86_pmu.pebs = 0; x86_pmu.pebs_constraints = NULL; } static __init void intel_sandybridge_quirk(void) { - printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); + pr_warn("PEBS disabled due to CPU errata\n"); x86_pmu.pebs = 0; x86_pmu.pebs_constraints = NULL; } @@ -1667,8 +1669,8 @@ static __init void intel_arch_events_quirk(void) /* disable event that reported as not presend by cpuid */ for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; - printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n", - intel_arch_events_map[bit].name); + pr_warn("CPUID marked event: \'%s\' unavailable\n", + intel_arch_events_map[bit].name); } } @@ -1687,7 +1689,7 @@ static __init void intel_nehalem_quirk(void) intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; ebx.split.no_branch_misses_retired = 0; x86_pmu.events_maskl = ebx.full; - printk(KERN_INFO "CPU erratum AAJ80 worked around\n"); + pr_info("CPU erratum AAJ80 worked around\n"); } } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 571246d81edf..87d3b5d663cd 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -27,8 +27,8 @@ static int die_counter; void printk_address(unsigned long address, int reliable) { - printk(" [<%p>] %s%pB\n", (void *) address, - reliable ? "" : "? ", (void *) address); + pr_cont(" [<%p>] %s%pB\n", + (void *)address, reliable ? "" : "? ", (void *)address); } #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index e0b1d783daab..3a8aced11ae9 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -73,11 +73,11 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, if (kstack_end(stack)) break; if (i && ((i % STACKSLOTS_PER_LINE) == 0)) - printk(KERN_CONT "\n"); - printk(KERN_CONT " %08lx", *stack++); + pr_cont("\n"); + pr_cont(" %08lx", *stack++); touch_nmi_watchdog(); } - printk(KERN_CONT "\n"); + pr_cont("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } @@ -89,9 +89,9 @@ void show_regs(struct pt_regs *regs) print_modules(); __show_regs(regs, !user_mode_vm(regs)); - printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", - TASK_COMM_LEN, current->comm, task_pid_nr(current), - current_thread_info(), current, task_thread_info(current)); + pr_emerg("Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", + TASK_COMM_LEN, current->comm, task_pid_nr(current), + current_thread_info(), current, task_thread_info(current)); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. @@ -102,10 +102,10 @@ void show_regs(struct pt_regs *regs) unsigned char c; u8 *ip; - printk(KERN_EMERG "Stack:\n"); + pr_emerg("Stack:\n"); show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); - printk(KERN_EMERG "Code: "); + pr_emerg("Code:"); ip = (u8 *)regs->ip - code_prologue; if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { @@ -116,16 +116,16 @@ void show_regs(struct pt_regs *regs) for (i = 0; i < code_len; i++, ip++) { if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - printk(KERN_CONT " Bad EIP value."); + pr_cont(" Bad EIP value."); break; } if (ip == (u8 *)regs->ip) - printk(KERN_CONT "<%02x> ", c); + pr_cont(" <%02x>", c); else - printk(KERN_CONT "%02x ", c); + pr_cont(" %02x", c); } } - printk(KERN_CONT "\n"); + pr_cont("\n"); } int is_valid_bugaddr(unsigned long ip) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 791b76122aa8..c582e9c5bd1a 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -228,20 +228,20 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, if (stack >= irq_stack && stack <= irq_stack_end) { if (stack == irq_stack_end) { stack = (unsigned long *) (irq_stack_end[-1]); - printk(KERN_CONT " "); + pr_cont(" "); } } else { if (((long) stack & (THREAD_SIZE-1)) == 0) break; } if (i && ((i % STACKSLOTS_PER_LINE) == 0)) - printk(KERN_CONT "\n"); - printk(KERN_CONT " %016lx", *stack++); + pr_cont("\n"); + pr_cont(" %016lx", *stack++); touch_nmi_watchdog(); } preempt_enable(); - printk(KERN_CONT "\n"); + pr_cont("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } @@ -256,8 +256,8 @@ void show_regs(struct pt_regs *regs) printk("CPU %d ", cpu); print_modules(); __show_regs(regs, 1); - printk("Process %s (pid: %d, threadinfo %p, task %p)\n", - cur->comm, cur->pid, task_thread_info(cur), cur); + printk(KERN_DEFAULT "Process %s (pid: %d, threadinfo %p, task %p)\n", + cur->comm, cur->pid, task_thread_info(cur), cur); /* * When in-kernel, we also print out the stack and code at the @@ -284,16 +284,16 @@ void show_regs(struct pt_regs *regs) for (i = 0; i < code_len; i++, ip++) { if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - printk(KERN_CONT " Bad RIP value."); + pr_cont(" Bad RIP value."); break; } if (ip == (u8 *)regs->ip) - printk(KERN_CONT "<%02x> ", c); + pr_cont("<%02x> ", c); else - printk(KERN_CONT "%02x ", c); + pr_cont("%02x ", c); } } - printk(KERN_CONT "\n"); + pr_cont("\n"); } int is_valid_bugaddr(unsigned long ip) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3dafc6003b7c..1f5f1d5d2a02 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -294,9 +294,9 @@ void fixup_irqs(void) raw_spin_unlock(&desc->lock); if (break_affinity && set_affinity) - printk("Broke affinity for irq %i\n", irq); + pr_notice("Broke affinity for irq %i\n", irq); else if (!set_affinity) - printk("Cannot set affinity for irq %i\n", irq); + pr_notice("Cannot set affinity for irq %i\n", irq); } /* diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index f21fd94ac897..202494d2ec6e 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -15,6 +15,9 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -30,9 +33,14 @@ #include #if 0 -#define DEBUGP printk +#define DEBUGP(fmt, ...) \ + printk(KERN_DEBUG fmt, ##__VA_ARGS__) #else -#define DEBUGP(fmt...) +#define DEBUGP(fmt, ...) \ +do { \ + if (0) \ + printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ +} while (0) #endif void *module_alloc(unsigned long size) @@ -56,8 +64,8 @@ int apply_relocate(Elf32_Shdr *sechdrs, Elf32_Sym *sym; uint32_t *location; - DEBUGP("Applying relocate section %u to %u\n", relsec, - sechdrs[relsec].sh_info); + DEBUGP("Applying relocate section %u to %u\n", + relsec, sechdrs[relsec].sh_info); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr @@ -77,7 +85,7 @@ int apply_relocate(Elf32_Shdr *sechdrs, *location += sym->st_value - (uint32_t)location; break; default: - printk(KERN_ERR "module %s: Unknown relocation: %u\n", + pr_err("%s: Unknown relocation: %u\n", me->name, ELF32_R_TYPE(rel[i].r_info)); return -ENOEXEC; } @@ -97,8 +105,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, void *loc; u64 val; - DEBUGP("Applying relocate section %u to %u\n", relsec, - sechdrs[relsec].sh_info); + DEBUGP("Applying relocate section %u to %u\n", + relsec, sechdrs[relsec].sh_info); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr @@ -110,8 +118,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, + ELF64_R_SYM(rel[i].r_info); DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", - (int)ELF64_R_TYPE(rel[i].r_info), - sym->st_value, rel[i].r_addend, (u64)loc); + (int)ELF64_R_TYPE(rel[i].r_info), + sym->st_value, rel[i].r_addend, (u64)loc); val = sym->st_value + rel[i].r_addend; @@ -140,7 +148,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, #endif break; default: - printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n", + pr_err("%s: Unknown rela relocation: %llu\n", me->name, ELF64_R_TYPE(rel[i].r_info)); return -ENOEXEC; } @@ -148,9 +156,9 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return 0; overflow: - printk(KERN_ERR "overflow in relocation type %d val %Lx\n", + pr_err("overflow in relocation type %d val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), val); - printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", + pr_err("`%s' likely not compiled with -mcmodel=kernel\n", me->name); return -ENOEXEC; } diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index b72838bae64a..299d49302e7d 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -22,6 +22,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) "Calgary: " fmt + #include #include #include @@ -245,7 +247,7 @@ static unsigned long iommu_range_alloc(struct device *dev, offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0, npages, 0, boundary_size, 0); if (offset == ~0UL) { - printk(KERN_WARNING "Calgary: IOMMU full.\n"); + pr_warn("IOMMU full\n"); spin_unlock_irqrestore(&tbl->it_lock, flags); if (panic_on_overflow) panic("Calgary: fix the allocator.\n"); @@ -271,8 +273,8 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, entry = iommu_range_alloc(dev, tbl, npages); if (unlikely(entry == DMA_ERROR_CODE)) { - printk(KERN_WARNING "Calgary: failed to allocate %u pages in " - "iommu %p\n", npages, tbl); + pr_warn("failed to allocate %u pages in iommu %p\n", + npages, tbl); return DMA_ERROR_CODE; } @@ -561,8 +563,7 @@ static void calgary_tce_cache_blast(struct iommu_table *tbl) i++; } while ((val & 0xff) != 0xff && i < 100); if (i == 100) - printk(KERN_WARNING "Calgary: PCI bus not quiesced, " - "continuing anyway\n"); + pr_warn("PCI bus not quiesced, continuing anyway\n"); /* invalidate TCE cache */ target = calgary_reg(bbar, tar_offset(tbl->it_busno)); @@ -604,8 +605,7 @@ begin: i++; } while ((val64 & 0xff) != 0xff && i < 100); if (i == 100) - printk(KERN_WARNING "CalIOC2: PCI bus not quiesced, " - "continuing anyway\n"); + pr_warn("CalIOC2: PCI bus not quiesced, continuing anyway\n"); /* 3. poll Page Migration DEBUG for SoftStopFault */ target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG); @@ -617,8 +617,7 @@ begin: if (++count < 100) goto begin; else { - printk(KERN_WARNING "CalIOC2: too many SoftStopFaults, " - "aborting TCE cache flush sequence!\n"); + pr_warn("CalIOC2: too many SoftStopFaults, aborting TCE cache flush sequence!\n"); return; /* pray for the best */ } } @@ -840,8 +839,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl) plssr = be32_to_cpu(readl(target)); /* If no error, the agent ID in the CSR is not valid */ - printk(KERN_EMERG "Calgary: DMA error on Calgary PHB 0x%x, " - "0x%08x@CSR 0x%08x@PLSSR\n", tbl->it_busno, csr, plssr); + pr_emerg("DMA error on Calgary PHB 0x%x, 0x%08x@CSR 0x%08x@PLSSR\n", + tbl->it_busno, csr, plssr); } static void calioc2_dump_error_regs(struct iommu_table *tbl) @@ -867,22 +866,21 @@ static void calioc2_dump_error_regs(struct iommu_table *tbl) target = calgary_reg(bbar, phboff | 0x800); mck = be32_to_cpu(readl(target)); - printk(KERN_EMERG "Calgary: DMA error on CalIOC2 PHB 0x%x\n", - tbl->it_busno); + pr_emerg("DMA error on CalIOC2 PHB 0x%x\n", tbl->it_busno); - printk(KERN_EMERG "Calgary: 0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n", - csr, plssr, csmr, mck); + pr_emerg("0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n", + csr, plssr, csmr, mck); /* dump rest of error regs */ - printk(KERN_EMERG "Calgary: "); + pr_emerg(""); for (i = 0; i < ARRAY_SIZE(errregs); i++) { /* err regs are at 0x810 - 0x870 */ erroff = (0x810 + (i * 0x10)); target = calgary_reg(bbar, phboff | erroff); errregs[i] = be32_to_cpu(readl(target)); - printk("0x%08x@0x%lx ", errregs[i], erroff); + pr_cont("0x%08x@0x%lx ", errregs[i], erroff); } - printk("\n"); + pr_cont("\n"); /* root complex status */ target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 735279e54e59..ef6a8456f719 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -145,16 +147,14 @@ void show_regs_common(void) /* Board Name is optional */ board = dmi_get_system_info(DMI_BOARD_NAME); - printk(KERN_CONT "\n"); - printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk(KERN_CONT " %s %s", vendor, product); - if (board) - printk(KERN_CONT "/%s", board); - printk(KERN_CONT "\n"); + printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s %s%s%s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version, + vendor, product, + board ? "/" : "", + board ? board : ""); } void flush_thread(void) @@ -645,7 +645,7 @@ static void amd_e400_idle(void) amd_e400_c1e_detected = true; if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) mark_tsc_unstable("TSC halt in AMD C1E"); - printk(KERN_INFO "System has AMD C1E enabled\n"); + pr_info("System has AMD C1E enabled\n"); } } @@ -659,8 +659,7 @@ static void amd_e400_idle(void) */ clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &cpu); - printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", - cpu); + pr_info("Switch to broadcast mode on CPU%d\n", cpu); } clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); @@ -681,8 +680,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP if (pm_idle == poll_idle && smp_num_siblings > 1) { - printk_once(KERN_WARNING "WARNING: polling idle and HT enabled," - " performance may degrade.\n"); + pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); } #endif if (pm_idle) @@ -692,11 +690,11 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) /* * One CPU supports mwait => All CPUs supports mwait */ - printk(KERN_INFO "using mwait in idle threads.\n"); + pr_info("using mwait in idle threads\n"); pm_idle = mwait_idle; } else if (cpu_has_amd_erratum(amd_erratum_400)) { /* E400: APIC timer interrupt does not wake up CPU from C1e */ - printk(KERN_INFO "using AMD E400 aware idle routine\n"); + pr_info("using AMD E400 aware idle routine\n"); pm_idle = amd_e400_idle; } else pm_idle = default_idle; @@ -715,7 +713,7 @@ static int __init idle_setup(char *str) return -EINVAL; if (!strcmp(str, "poll")) { - printk("using polling idle threads.\n"); + pr_info("using polling idle threads\n"); pm_idle = poll_idle; boot_option_idle_override = IDLE_POLL; } else if (!strcmp(str, "mwait")) { diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 61cdf7fdf099..85151f3e36e2 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -117,10 +117,10 @@ void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { if (dead_task->mm->context.size) { - printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", - dead_task->comm, - dead_task->mm->context.ldt, - dead_task->mm->context.size); + pr_warn("WARNING: dead process %8s still has LDT? <%p/%d>\n", + dead_task->comm, + dead_task->mm->context.ldt, + dead_task->mm->context.size); BUG(); } } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 79c45af81604..ab3f06260712 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -152,7 +154,8 @@ static int __init set_bios_reboot(const struct dmi_system_id *d) { if (reboot_type != BOOT_BIOS) { reboot_type = BOOT_BIOS; - printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident); + pr_info("%s series board detected. Selecting %s-method for reboots.\n", + "BIOS", d->ident); } return 0; } @@ -207,8 +210,8 @@ static int __init set_pci_reboot(const struct dmi_system_id *d) { if (reboot_type != BOOT_CF9) { reboot_type = BOOT_CF9; - printk(KERN_INFO "%s series board detected. " - "Selecting PCI-method for reboots.\n", d->ident); + pr_info("%s series board detected. Selecting %s-method for reboots.\n", + "PCI", d->ident); } return 0; } @@ -217,7 +220,8 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d) { if (reboot_type != BOOT_KBD) { reboot_type = BOOT_KBD; - printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident); + pr_info("%s series board detected. Selecting %s-method for reboot.\n", + "KBD", d->ident); } return 0; } @@ -668,7 +672,7 @@ static void __machine_emergency_restart(int emergency) static void native_machine_restart(char *__unused) { - printk("machine restart\n"); + pr_notice("machine restart\n"); if (!reboot_force) machine_shutdown(); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 21af737053aa..b280908a376e 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -6,6 +6,9 @@ * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes * 2000-2002 x86-64 support by Andi Kleen */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -814,7 +817,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) me->comm, me->pid, where, frame, regs->ip, regs->sp, regs->orig_ax); print_vma_addr(" in ", regs->ip); - printk(KERN_CONT "\n"); + pr_cont("\n"); } force_sig(SIGSEGV, me); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fd019d78b1f4..456d64806c8f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1,4 +1,4 @@ -/* + /* * x86 SMP booting functions * * (c) 1995 Alan Cox, Building #3 @@ -39,6 +39,8 @@ * Glauber Costa : i386 and x86_64 integration */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -184,7 +186,7 @@ static void __cpuinit smp_callin(void) * boards) */ - pr_debug("CALLIN, before setup_local_APIC().\n"); + pr_debug("CALLIN, before setup_local_APIC()\n"); if (apic->smp_callin_clear_local_apic) apic->smp_callin_clear_local_apic(); setup_local_APIC(); @@ -420,17 +422,16 @@ static void impress_friends(void) /* * Allow the user to impress friends. */ - pr_debug("Before bogomips.\n"); + pr_debug("Before bogomips\n"); for_each_possible_cpu(cpu) if (cpumask_test_cpu(cpu, cpu_callout_mask)) bogosum += cpu_data(cpu).loops_per_jiffy; - printk(KERN_INFO - "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n", num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); - pr_debug("Before bogocount - setting activated=1.\n"); + pr_debug("Before bogocount - setting activated=1\n"); } void __inquire_remote_apic(int apicid) @@ -440,18 +441,17 @@ void __inquire_remote_apic(int apicid) int timeout; u32 status; - printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid); + pr_info("Inquiring remote APIC 0x%x...\n", apicid); for (i = 0; i < ARRAY_SIZE(regs); i++) { - printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]); + pr_info("... APIC 0x%x %s: ", apicid, names[i]); /* * Wait for idle. */ status = safe_apic_wait_icr_idle(); if (status) - printk(KERN_CONT - "a previous APIC delivery may have failed\n"); + pr_cont("a previous APIC delivery may have failed\n"); apic_icr_write(APIC_DM_REMRD | regs[i], apicid); @@ -464,10 +464,10 @@ void __inquire_remote_apic(int apicid) switch (status) { case APIC_ICR_RR_VALID: status = apic_read(APIC_RRR); - printk(KERN_CONT "%08x\n", status); + pr_cont("%08x\n", status); break; default: - printk(KERN_CONT "failed\n"); + pr_cont("failed\n"); } } } @@ -501,12 +501,12 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) apic_write(APIC_ESR, 0); accept_status = (apic_read(APIC_ESR) & 0xEF); } - pr_debug("NMI sent.\n"); + pr_debug("NMI sent\n"); if (send_status) - printk(KERN_ERR "APIC never delivered???\n"); + pr_err("APIC never delivered???\n"); if (accept_status) - printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); + pr_err("APIC delivery error (%lx)\n", accept_status); return (send_status | accept_status); } @@ -528,7 +528,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) apic_read(APIC_ESR); } - pr_debug("Asserting INIT.\n"); + pr_debug("Asserting INIT\n"); /* * Turn INIT on target chip @@ -544,7 +544,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) mdelay(10); - pr_debug("Deasserting INIT.\n"); + pr_debug("Deasserting INIT\n"); /* Target chip */ /* Send IPI */ @@ -577,14 +577,14 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) /* * Run STARTUP IPI loop. */ - pr_debug("#startup loops: %d.\n", num_starts); + pr_debug("#startup loops: %d\n", num_starts); for (j = 1; j <= num_starts; j++) { - pr_debug("Sending STARTUP #%d.\n", j); + pr_debug("Sending STARTUP #%d\n", j); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - pr_debug("After apic_write.\n"); + pr_debug("After apic_write\n"); /* * STARTUP IPI @@ -601,7 +601,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) */ udelay(300); - pr_debug("Startup point 1.\n"); + pr_debug("Startup point 1\n"); pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -616,12 +616,12 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) if (send_status || accept_status) break; } - pr_debug("After Startup.\n"); + pr_debug("After Startup\n"); if (send_status) - printk(KERN_ERR "APIC never delivered???\n"); + pr_err("APIC never delivered???\n"); if (accept_status) - printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); + pr_err("APIC delivery error (%lx)\n", accept_status); return (send_status | accept_status); } @@ -635,11 +635,11 @@ static void __cpuinit announce_cpu(int cpu, int apicid) if (system_state == SYSTEM_BOOTING) { if (node != current_node) { if (current_node > (-1)) - pr_cont(" Ok.\n"); + pr_cont(" OK\n"); current_node = node; pr_info("Booting Node %3d, Processors ", node); } - pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : ""); + pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : ""); return; } else pr_info("Booting Node %d Processor %d APIC 0x%x\n", @@ -719,9 +719,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) /* * allow APs to start initializing. */ - pr_debug("Before Callout %d.\n", cpu); + pr_debug("Before Callout %d\n", cpu); cpumask_set_cpu(cpu, cpu_callout_mask); - pr_debug("After Callout %d.\n", cpu); + pr_debug("After Callout %d\n", cpu); /* * Wait 5s total for a response @@ -749,7 +749,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) pr_err("CPU%d: Stuck ??\n", cpu); else /* trampoline code not run */ - pr_err("CPU%d: Not responding.\n", cpu); + pr_err("CPU%d: Not responding\n", cpu); if (apic->inquire_remote_apic) apic->inquire_remote_apic(apicid); } @@ -794,7 +794,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle) if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || !physid_isset(apicid, phys_cpu_present_map) || !apic->apic_id_valid(apicid)) { - printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); + pr_err("%s: bad cpu %d\n", __func__, cpu); return -EINVAL; } @@ -875,9 +875,8 @@ static int __init smp_sanity_check(unsigned max_cpus) unsigned int cpu; unsigned nr; - printk(KERN_WARNING - "More than 8 CPUs detected - skipping them.\n" - "Use CONFIG_X86_BIGSMP.\n"); + pr_warn("More than 8 CPUs detected - skipping them\n" + "Use CONFIG_X86_BIGSMP\n"); nr = 0; for_each_present_cpu(cpu) { @@ -898,8 +897,7 @@ static int __init smp_sanity_check(unsigned max_cpus) #endif if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { - printk(KERN_WARNING - "weird, boot CPU (#%d) not listed by the BIOS.\n", + pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n", hard_smp_processor_id()); physid_set(hard_smp_processor_id(), phys_cpu_present_map); @@ -911,11 +909,10 @@ static int __init smp_sanity_check(unsigned max_cpus) */ if (!smp_found_config && !acpi_lapic) { preempt_enable(); - printk(KERN_NOTICE "SMP motherboard not detected.\n"); + pr_notice("SMP motherboard not detected\n"); disable_smp(); if (APIC_init_uniprocessor()) - printk(KERN_NOTICE "Local APIC not detected." - " Using dummy APIC emulation.\n"); + pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); return -1; } @@ -924,9 +921,8 @@ static int __init smp_sanity_check(unsigned max_cpus) * CPU too, but we do it for the sake of robustness anyway. */ if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { - printk(KERN_NOTICE - "weird, boot CPU (#%d) not listed by the BIOS.\n", - boot_cpu_physical_apicid); + pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n", + boot_cpu_physical_apicid); physid_set(hard_smp_processor_id(), phys_cpu_present_map); } preempt_enable(); @@ -939,8 +935,7 @@ static int __init smp_sanity_check(unsigned max_cpus) if (!disable_apic) { pr_err("BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); - pr_err("... forcing use of dummy APIC emulation." - "(tell your hw vendor)\n"); + pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n"); } smpboot_clear_io_apic(); disable_ioapic_support(); @@ -953,7 +948,7 @@ static int __init smp_sanity_check(unsigned max_cpus) * If SMP should be disabled, then really disable it! */ if (!max_cpus) { - printk(KERN_INFO "SMP mode deactivated.\n"); + pr_info("SMP mode deactivated\n"); smpboot_clear_io_apic(); connect_bsp_APIC(); @@ -1005,7 +1000,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) if (smp_sanity_check(max_cpus) < 0) { - printk(KERN_INFO "SMP disabled\n"); + pr_info("SMP disabled\n"); disable_smp(); goto out; } @@ -1043,7 +1038,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) * Set up local APIC timer on boot CPU. */ - printk(KERN_INFO "CPU%d: ", 0); + pr_info("CPU%d: ", 0); print_cpu_info(&cpu_data(0)); x86_init.timers.setup_percpu_clockev(); @@ -1093,7 +1088,7 @@ void __init native_smp_prepare_boot_cpu(void) void __init native_smp_cpus_done(unsigned int max_cpus) { - pr_debug("Boot done.\n"); + pr_debug("Boot done\n"); nmi_selftest(); impress_friends(); @@ -1154,8 +1149,7 @@ __init void prefill_possible_map(void) /* nr_cpu_ids could be reduced via nr_cpus= */ if (possible > nr_cpu_ids) { - printk(KERN_WARNING - "%d Processors exceeds NR_CPUS limit of %d\n", + pr_warn("%d Processors exceeds NR_CPUS limit of %d\n", possible, nr_cpu_ids); possible = nr_cpu_ids; } @@ -1164,13 +1158,12 @@ __init void prefill_possible_map(void) if (!setup_max_cpus) #endif if (possible > i) { - printk(KERN_WARNING - "%d Processors exceeds max_cpus limit of %u\n", + pr_warn("%d Processors exceeds max_cpus limit of %u\n", possible, setup_max_cpus); possible = i; } - printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", + pr_info("Allowing %d CPUs, %d hotplug CPUs\n", possible, max_t(int, possible - num_processors, 0)); for (i = 0; i < possible; i++) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 05b31d92f69c..b481341c9369 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -9,6 +9,9 @@ /* * Handle hardware traps and faults. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -143,12 +146,11 @@ trap_signal: #ifdef CONFIG_X86_64 if (show_unhandled_signals && unhandled_signal(tsk, signr) && printk_ratelimit()) { - printk(KERN_INFO - "%s[%d] trap %s ip:%lx sp:%lx error:%lx", - tsk->comm, tsk->pid, str, - regs->ip, regs->sp, error_code); + pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx", + tsk->comm, tsk->pid, str, + regs->ip, regs->sp, error_code); print_vma_addr(" in ", regs->ip); - printk("\n"); + pr_cont("\n"); } #endif @@ -269,12 +271,11 @@ do_general_protection(struct pt_regs *regs, long error_code) if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && printk_ratelimit()) { - printk(KERN_INFO - "%s[%d] general protection ip:%lx sp:%lx error:%lx", + pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx", tsk->comm, task_pid_nr(tsk), regs->ip, regs->sp, error_code); print_vma_addr(" in ", regs->ip); - printk("\n"); + pr_cont("\n"); } force_sig(SIGSEGV, tsk); @@ -570,7 +571,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) conditional_sti(regs); #if 0 /* No need to warn about this any longer. */ - printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); + pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); #endif } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index fc0a147e3727..cfa5d4f7ca56 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -84,8 +86,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable); #ifdef CONFIG_X86_TSC int __init notsc_setup(char *str) { - printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " - "cannot disable TSC completely.\n"); + pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n"); tsc_disabled = 1; return 1; } @@ -373,7 +374,7 @@ static unsigned long quick_pit_calibrate(void) goto success; } } - printk("Fast TSC calibration failed\n"); + pr_err("Fast TSC calibration failed\n"); return 0; success: @@ -392,7 +393,7 @@ success: */ delta *= PIT_TICK_RATE; do_div(delta, i*256*1000); - printk("Fast TSC calibration using PIT\n"); + pr_info("Fast TSC calibration using PIT\n"); return delta; } @@ -487,9 +488,8 @@ unsigned long native_calibrate_tsc(void) * use the reference value, as it is more precise. */ if (delta >= 90 && delta <= 110) { - printk(KERN_INFO - "TSC: PIT calibration matches %s. %d loops\n", - hpet ? "HPET" : "PMTIMER", i + 1); + pr_info("PIT calibration matches %s. %d loops\n", + hpet ? "HPET" : "PMTIMER", i + 1); return tsc_ref_min; } @@ -511,38 +511,36 @@ unsigned long native_calibrate_tsc(void) */ if (tsc_pit_min == ULONG_MAX) { /* PIT gave no useful value */ - printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); + pr_warn("Unable to calibrate against PIT\n"); /* We don't have an alternative source, disable TSC */ if (!hpet && !ref1 && !ref2) { - printk("TSC: No reference (HPET/PMTIMER) available\n"); + pr_notice("No reference (HPET/PMTIMER) available\n"); return 0; } /* The alternative source failed as well, disable TSC */ if (tsc_ref_min == ULONG_MAX) { - printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " - "failed.\n"); + pr_warn("HPET/PMTIMER calibration failed\n"); return 0; } /* Use the alternative source */ - printk(KERN_INFO "TSC: using %s reference calibration\n", - hpet ? "HPET" : "PMTIMER"); + pr_info("using %s reference calibration\n", + hpet ? "HPET" : "PMTIMER"); return tsc_ref_min; } /* We don't have an alternative source, use the PIT calibration value */ if (!hpet && !ref1 && !ref2) { - printk(KERN_INFO "TSC: Using PIT calibration value\n"); + pr_info("Using PIT calibration value\n"); return tsc_pit_min; } /* The alternative source failed, use the PIT calibration value */ if (tsc_ref_min == ULONG_MAX) { - printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " - "Using PIT calibration\n"); + pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n"); return tsc_pit_min; } @@ -551,9 +549,9 @@ unsigned long native_calibrate_tsc(void) * the PIT value as we know that there are PMTIMERs around * running at double speed. At least we let the user know: */ - printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", - hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); - printk(KERN_INFO "TSC: Using PIT calibration value\n"); + pr_warn("PIT calibration deviates from %s: %lu %lu\n", + hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); + pr_info("Using PIT calibration value\n"); return tsc_pit_min; } @@ -785,7 +783,7 @@ void mark_tsc_unstable(char *reason) tsc_unstable = 1; sched_clock_stable = 0; disable_sched_clock_irqtime(); - printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); + pr_info("Marking TSC unstable due to %s\n", reason); /* Change only the rating, when not registered */ if (clocksource_tsc.mult) clocksource_mark_unstable(&clocksource_tsc); @@ -912,9 +910,9 @@ static void tsc_refine_calibration_work(struct work_struct *work) goto out; tsc_khz = freq; - printk(KERN_INFO "Refined TSC clocksource calibration: " - "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000, - (unsigned long)tsc_khz % 1000); + pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n", + (unsigned long)tsc_khz / 1000, + (unsigned long)tsc_khz % 1000); out: clocksource_register_khz(&clocksource_tsc, tsc_khz); @@ -970,9 +968,9 @@ void __init tsc_init(void) return; } - printk("Detected %lu.%03lu MHz processor.\n", - (unsigned long)cpu_khz / 1000, - (unsigned long)cpu_khz % 1000); + pr_info("Detected %lu.%03lu MHz processor\n", + (unsigned long)cpu_khz / 1000, + (unsigned long)cpu_khz % 1000); /* * Secondary CPUs do not run through tsc_init(), so set up diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 255f58ae71e8..54abcc0baf23 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -28,6 +28,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -137,14 +139,14 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) local_irq_enable(); if (!current->thread.vm86_info) { - printk("no vm86_info: BAD\n"); + pr_alert("no vm86_info: BAD\n"); do_exit(SIGSEGV); } set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask); tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs); tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap); if (tmp) { - printk("vm86: could not access userspace vm86_info\n"); + pr_alert("could not access userspace vm86_info\n"); do_exit(SIGSEGV); } diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 7515cf0e1805..acdc125ad44e 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -18,6 +18,8 @@ * use the vDSO. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -111,18 +113,13 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, const char *message) { - static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - struct task_struct *tsk; - - if (!show_unhandled_signals || !__ratelimit(&rs)) + if (!show_unhandled_signals) return; - tsk = current; - - printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", - level, tsk->comm, task_pid_nr(tsk), - message, regs->ip, regs->cs, - regs->sp, regs->ax, regs->si, regs->di); + pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", + level, current->comm, task_pid_nr(current), + message, regs->ip, regs->cs, + regs->sp, regs->ax, regs->si, regs->di); } static int addr_to_vsyscall_nr(unsigned long addr) diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index bd18149b2b0f..3d3e20709119 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -3,6 +3,9 @@ * * Author: Suresh Siddha */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -162,7 +165,7 @@ int save_i387_xstate(void __user *buf) BUG_ON(sig_xstate_size < xstate_size); if ((unsigned long)buf % 64) - printk("save_i387_xstate: bad fpstate %p\n", buf); + pr_err("%s: bad fpstate %p\n", __func__, buf); if (!used_math()) return 0; @@ -422,7 +425,7 @@ static void __init xstate_enable_boot_cpu(void) pcntxt_mask = eax + ((u64)edx << 32); if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { - printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n", + pr_err("FP/SSE not shown under xsave features 0x%llx\n", pcntxt_mask); BUG(); } @@ -445,9 +448,8 @@ static void __init xstate_enable_boot_cpu(void) setup_xstate_init(); - printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, " - "cntxt size 0x%x\n", - pcntxt_mask, xstate_size); + pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", + pcntxt_mask, xstate_size); } /* -- cgit v1.2.3 From 332afa656e76458ee9cf0f0d123016a0658539e4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 21 May 2012 16:58:01 -0700 Subject: x86/irq: Update irq_cfg domain unless the new affinity is a subset of the current domain Until now, irq_cfg domain is mostly static. Either all CPU's (used by flat mode) or one CPU (first CPU in the irq afffinity mask) to which irq is being migrated (this is used by the rest of apic modes). Upcoming x2apic cluster mode optimization patch allows the irq to be sent to any CPU in the x2apic cluster (if supported by the HW). So irq_cfg domain changes on the fly (depending on which CPU in the x2apic cluster is online). Instead of checking for any intersection between the new irq affinity mask and the current irq_cfg domain, check if the new irq affinity mask is a subset of the current irq_cfg domain. Otherwise proceed with updating the irq_cfg domain aswell as assigning vector's on all the CPUs specified in the new mask. This also cleans up a workaround in updating irq_cfg domain for legacy irq's that are handled by the IO-APIC. Signed-off-by: Suresh Siddha Cc: yinghai@kernel.org Cc: gorcunov@openvz.org Cc: agordeev@redhat.com Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1337644682-19854-1-git-send-email-suresh.b.siddha@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ac96561d1a99..910a3118438b 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1126,8 +1126,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) old_vector = cfg->vector; if (old_vector) { cpumask_and(tmp_mask, mask, cpu_online_mask); - cpumask_and(tmp_mask, cfg->domain, tmp_mask); - if (!cpumask_empty(tmp_mask)) { + if (cpumask_subset(tmp_mask, cfg->domain)) { free_cpumask_var(tmp_mask); return 0; } @@ -1141,6 +1140,11 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) apic->vector_allocation_domain(cpu, tmp_mask); + if (cpumask_subset(tmp_mask, cfg->domain)) { + free_cpumask_var(tmp_mask); + return 0; + } + vector = current_vector; offset = current_offset; next: @@ -1346,13 +1350,6 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, if (!IO_APIC_IRQ(irq)) return; - /* - * For legacy irqs, cfg->domain starts with cpu 0 for legacy - * controllers like 8259. Now that IO-APIC can handle this irq, update - * the cfg->domain. - */ - if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) - apic->vector_allocation_domain(0, cfg->domain); if (assign_irq_vector(irq, cfg, apic->target_cpus())) return; -- cgit v1.2.3 From 0b8255e660a0c229ebfe8f9fde12a8d4d34c50e0 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 21 May 2012 16:58:02 -0700 Subject: x86/x2apic/cluster: Use all the members of one cluster specified in the smp_affinity mask for the interrupt destination If the HW implements round-robin interrupt delivery, this enables multiple cpu's (which are part of the user specified interrupt smp_affinity mask and belong to the same x2apic cluster) to service the interrupt. Also if the platform supports Power Aware Interrupt Routing, then this enables the interrupt to be routed to an idle cpu or a busy cpu depending on the perf/power bias tunable. We are now grouping all the cpu's in a cluster to one vector domain. So that will limit the total number of interrupt sources handled by Linux. Previously we support "cpu-count * available-vectors-per-cpu" interrupt sources but this will now reduce to "cpu-count/16 * available-vectors-per-cpu". Signed-off-by: Suresh Siddha Cc: yinghai@kernel.org Cc: gorcunov@openvz.org Cc: agordeev@redhat.com Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1337644682-19854-2-git-send-email-suresh.b.siddha@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/x2apic.h | 9 ------ arch/x86/kernel/apic/x2apic_cluster.c | 56 ++++++++++++++++++++++++----------- arch/x86/kernel/apic/x2apic_phys.c | 9 ++++++ 3 files changed, 48 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h index 92e54abf89e0..7a5a832a99b6 100644 --- a/arch/x86/include/asm/x2apic.h +++ b/arch/x86/include/asm/x2apic.h @@ -28,15 +28,6 @@ static int x2apic_apic_id_registered(void) return 1; } -/* - * For now each logical cpu is in its own vector allocation domain. - */ -static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) { diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index ff35cff0e1a7..90d999c7f2ea 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -98,34 +98,47 @@ static void x2apic_send_IPI_all(int vector) static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) { - /* - * We're using fixed IRQ delivery, can only return one logical APIC ID. - * May as well be the first. - */ int cpu = cpumask_first(cpumask); + u32 dest = 0; + int i; - if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_logical_apicid, cpu); - else + if (cpu > nr_cpu_ids) return BAD_APICID; + + for_each_cpu_and(i, cpumask, per_cpu(cpus_in_cluster, cpu)) + dest |= per_cpu(x86_cpu_to_logical_apicid, i); + + return dest; } static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, const struct cpumask *andmask) { - int cpu; + u32 dest = 0; + u16 cluster; + int i; - /* - * We're using fixed IRQ delivery, can only return one logical APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; + for_each_cpu_and(i, cpumask, andmask) { + if (!cpumask_test_cpu(i, cpu_online_mask)) + continue; + dest = per_cpu(x86_cpu_to_logical_apicid, i); + cluster = x2apic_cluster(i); + break; } - return per_cpu(x86_cpu_to_logical_apicid, cpu); + if (!dest) + return BAD_APICID; + + for_each_cpu_and(i, cpumask, andmask) { + if (!cpumask_test_cpu(i, cpu_online_mask)) + continue; + if (cluster != x2apic_cluster(i)) + continue; + dest |= per_cpu(x86_cpu_to_logical_apicid, i); + } + + return dest; } static void init_x2apic_ldr(void) @@ -208,6 +221,15 @@ static int x2apic_cluster_probe(void) return 0; } +/* + * Each x2apic cluster is an allocation domain. + */ +static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask) +{ + cpumask_clear(retmask); + cpumask_copy(retmask, per_cpu(cpus_in_cluster, cpu)); +} + static struct apic apic_x2apic_cluster = { .name = "cluster x2apic", @@ -225,7 +247,7 @@ static struct apic apic_x2apic_cluster = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .vector_allocation_domain = x2apic_vector_allocation_domain, + .vector_allocation_domain = cluster_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, .ioapic_phys_id_map = NULL, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index c17e982db275..93b25706f177 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -120,6 +120,15 @@ static int x2apic_phys_probe(void) return apic == &apic_x2apic_phys; } +/* + * Each logical cpu is in its own vector allocation domain. + */ +static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) +{ + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); +} + static struct apic apic_x2apic_phys = { .name = "physical x2apic", -- cgit v1.2.3 From 49d0c7a0a425a89190b7c3b1445faba9eb227bec Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 5 Jun 2012 13:23:15 +0200 Subject: x86/apic: Trivial whitespace fixes Signed-off-by: Alexander Gordeev Cc: Suresh Siddha Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20120605112310.GA11443@dhcp-26-207.brq.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic_flat_64.c | 2 +- arch/x86/kernel/apic/io_apic.c | 4 ++-- arch/x86/kernel/apic/x2apic_cluster.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 0e881c46e8c8..de279b32ceb1 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -92,7 +92,7 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) } static void - flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) +flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { unsigned long mask = cpumask_bits(cpumask)[0]; int cpu = smp_processor_id(); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 910a3118438b..74c569791e75 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1363,7 +1363,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, cfg->vector, irq, attr->trigger, attr->polarity, dest); if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { - pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", + pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); __clear_irq_vector(irq, cfg); @@ -1466,7 +1466,7 @@ void setup_IO_APIC_irq_extra(u32 gsi) * Set up the timer pin, possibly with the 8259A-master behind. */ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, - unsigned int pin, int vector) + unsigned int pin, int vector) { struct IO_APIC_route_entry entry; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 90d999c7f2ea..2919e45d30c3 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -81,7 +81,7 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) } static void - x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); } -- cgit v1.2.3 From bf721d3a3bc7a731add45c8078b142b494ab413e Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 5 Jun 2012 13:23:29 +0200 Subject: x86/apic: Factor out default target_cpus() operation Signed-off-by: Alexander Gordeev Cc: Suresh Siddha Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20120605112324.GA11449@dhcp-26-207.brq.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 5 +++++ arch/x86/include/asm/x2apic.h | 9 --------- arch/x86/kernel/apic/apic_flat_64.c | 14 ++------------ arch/x86/kernel/apic/apic_numachip.c | 7 +------ arch/x86/kernel/apic/bigsmp_32.c | 11 +---------- arch/x86/kernel/apic/x2apic_cluster.c | 2 +- arch/x86/kernel/apic/x2apic_phys.c | 2 +- arch/x86/kernel/apic/x2apic_uv_x.c | 7 +------ 8 files changed, 12 insertions(+), 45 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index eaff4790ed96..fc38195d6405 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -537,6 +537,11 @@ static inline const struct cpumask *default_target_cpus(void) #endif } +static inline const struct cpumask *online_target_cpus(void) +{ + return cpu_online_mask; +} + DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h index 7a5a832a99b6..f90f0a587c66 100644 --- a/arch/x86/include/asm/x2apic.h +++ b/arch/x86/include/asm/x2apic.h @@ -9,15 +9,6 @@ #include #include -/* - * Need to use more than cpu 0, because we need more vectors - * when MSI-X are used. - */ -static const struct cpumask *x2apic_target_cpus(void) -{ - return cpu_online_mask; -} - static int x2apic_apic_id_valid(int apicid) { return 1; diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index de279b32ceb1..61ac1afeff07 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -36,11 +36,6 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 1; } -static const struct cpumask *flat_target_cpus(void) -{ - return cpu_online_mask; -} - static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus @@ -186,7 +181,7 @@ static struct apic apic_flat = { .irq_delivery_mode = dest_LowestPrio, .irq_dest_mode = 1, /* logical */ - .target_cpus = flat_target_cpus, + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, @@ -262,11 +257,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static const struct cpumask *physflat_target_cpus(void) -{ - return cpu_online_mask; -} - static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) { cpumask_clear(retmask); @@ -345,7 +335,7 @@ static struct apic apic_physflat = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = physflat_target_cpus, + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 6ec6d5d297c3..3255a60fcc95 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -72,11 +72,6 @@ static int numachip_phys_pkg_id(int initial_apic_id, int index_msb) return initial_apic_id >> index_msb; } -static const struct cpumask *numachip_target_cpus(void) -{ - return cpu_online_mask; -} - static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask) { cpumask_clear(retmask); @@ -253,7 +248,7 @@ static struct apic apic_numachip __refconst = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = numachip_target_cpus, + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 31fbdbfbf960..c288e81e00ff 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -26,15 +26,6 @@ static int bigsmp_apic_id_registered(void) return 1; } -static const struct cpumask *bigsmp_target_cpus(void) -{ -#ifdef CONFIG_SMP - return cpu_online_mask; -#else - return cpumask_of(0); -#endif -} - static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) { return 0; @@ -205,7 +196,7 @@ static struct apic apic_bigsmp = { /* phys delivery to target CPU: */ .irq_dest_mode = 0, - .target_cpus = bigsmp_target_cpus, + .target_cpus = default_target_cpus, .disable_esr = 1, .dest_logical = 0, .check_apicid_used = bigsmp_check_apicid_used, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 2919e45d30c3..612622c47dfb 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -241,7 +241,7 @@ static struct apic apic_x2apic_cluster = { .irq_delivery_mode = dest_LowestPrio, .irq_dest_mode = 1, /* logical */ - .target_cpus = x2apic_target_cpus, + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 93b25706f177..b1a8b39e3c3f 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -140,7 +140,7 @@ static struct apic apic_x2apic_phys = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = x2apic_target_cpus, + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index c6d03f7a4401..16efb92bfea5 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -185,11 +185,6 @@ EXPORT_SYMBOL_GPL(uv_possible_blades); unsigned long sn_rtc_cycles_per_second; EXPORT_SYMBOL(sn_rtc_cycles_per_second); -static const struct cpumask *uv_target_cpus(void) -{ - return cpu_online_mask; -} - static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) { cpumask_clear(retmask); @@ -362,7 +357,7 @@ static struct apic __refdata apic_x2apic_uv_x = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = uv_target_cpus, + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, -- cgit v1.2.3 From 6398268d2bc454735f11e08705e858f9fdf5c750 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 5 Jun 2012 13:23:44 +0200 Subject: x86/apic: Factor out default cpu_mask_to_apicid() operations Signed-off-by: Alexander Gordeev Cc: Suresh Siddha Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20120605112340.GA11454@dhcp-26-207.brq.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 13 +++++++++--- arch/x86/kernel/apic/apic.c | 28 +++++++++++++++++++++++++ arch/x86/kernel/apic/apic_flat_64.c | 40 ++++-------------------------------- arch/x86/kernel/apic/apic_noop.c | 4 ++-- arch/x86/kernel/apic/apic_numachip.c | 36 ++------------------------------ arch/x86/kernel/apic/bigsmp_32.c | 30 ++------------------------- arch/x86/kernel/apic/probe_32.c | 4 ++-- arch/x86/kernel/apic/x2apic_phys.c | 36 ++------------------------------ 8 files changed, 52 insertions(+), 139 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index fc38195d6405..bef571769e68 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -592,14 +592,14 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) #endif static inline unsigned int -default_cpu_mask_to_apicid(const struct cpumask *cpumask) +flat_cpu_mask_to_apicid(const struct cpumask *cpumask) { return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; } static inline unsigned int -default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { unsigned long mask1 = cpumask_bits(cpumask)[0]; unsigned long mask2 = cpumask_bits(andmask)[0]; @@ -608,6 +608,13 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return (unsigned int)(mask1 & mask2 & mask3); } +extern unsigned int +default_cpu_mask_to_apicid(const struct cpumask *cpumask); + +extern unsigned int +default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask); + static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) { return physid_isset(apicid, *map); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 39a222e094af..96a2608252f1 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2123,6 +2123,34 @@ void default_init_apic_ldr(void) apic_write(APIC_LDR, val); } +unsigned int default_cpu_mask_to_apicid(const struct cpumask *cpumask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + cpu = cpumask_first(cpumask); + if (likely((unsigned)cpu < nr_cpu_ids)) + return per_cpu(x86_cpu_to_apicid, cpu); + + return BAD_APICID; +} + +unsigned int +default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + int cpu; + + for_each_cpu_and(cpu, cpumask, andmask) { + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; + } + return per_cpu(x86_cpu_to_apicid, cpu); +} + /* * Power management */ diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 61ac1afeff07..55b97ce4fa19 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -205,8 +205,8 @@ static struct apic apic_flat = { .set_apic_id = set_apic_id, .apic_id_mask = 0xFFu << 24, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .send_IPI_mask = flat_send_IPI_mask, .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, @@ -284,38 +284,6 @@ static void physflat_send_IPI_all(int vector) physflat_send_IPI_mask(cpu_online_mask, vector); } -static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - cpu = cpumask_first(cpumask); - if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - else - return BAD_APICID; -} - -static unsigned int -physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - return per_cpu(x86_cpu_to_apicid, cpu); -} - static int physflat_probe(void) { if (apic == &apic_physflat || num_possible_cpus() > 8) @@ -360,8 +328,8 @@ static struct apic apic_physflat = { .set_apic_id = set_apic_id, .apic_id_mask = 0xFFu << 24, - .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = physflat_send_IPI_mask, .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index a6e4c6e06c08..7c3dd4fe0686 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -159,8 +159,8 @@ struct apic apic_noop = { .set_apic_id = NULL, .apic_id_mask = 0x0F << 24, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .send_IPI_mask = noop_send_IPI_mask, .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 3255a60fcc95..dba4bf2ed566 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -152,38 +152,6 @@ static void numachip_send_IPI_self(int vector) __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); } -static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - cpu = cpumask_first(cpumask); - if (likely((unsigned)cpu < nr_cpu_ids)) - return per_cpu(x86_cpu_to_apicid, cpu); - - return BAD_APICID; -} - -static unsigned int -numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - return per_cpu(x86_cpu_to_apicid, cpu); -} - static int __init numachip_probe(void) { return apic == &apic_numachip; @@ -272,8 +240,8 @@ static struct apic apic_numachip __refconst = { .set_apic_id = set_apic_id, .apic_id_mask = 0xffU << 24, - .cpu_mask_to_apicid = numachip_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = numachip_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = numachip_send_IPI_mask, .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index c288e81e00ff..907aa3d112a6 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -96,32 +96,6 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid) return 1; } -/* As we are using single CPU as destination, pick only one CPU here */ -static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - int cpu = cpumask_first(cpumask); - - if (cpu < nr_cpu_ids) - return cpu_physical_id(cpu); - return BAD_APICID; -} - -static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - return cpu_physical_id(cpu); - } - return BAD_APICID; -} - static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; @@ -220,8 +194,8 @@ static struct apic apic_bigsmp = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = bigsmp_send_IPI_mask, .send_IPI_mask_allbutself = NULL, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1b291da09e60..71b6ac48ab26 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -123,8 +123,8 @@ static struct apic apic_default = { .set_apic_id = NULL, .apic_id_mask = 0x0F << 24, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .send_IPI_mask = default_send_IPI_mask_logical, .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index b1a8b39e3c3f..f730269edef2 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -76,38 +76,6 @@ static void x2apic_send_IPI_all(int vector) __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); } -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - int cpu = cpumask_first(cpumask); - - if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - else - return BAD_APICID; -} - -static unsigned int -x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - - return per_cpu(x86_cpu_to_apicid, cpu); -} - static void init_x2apic_ldr(void) { } @@ -164,8 +132,8 @@ static struct apic apic_x2apic_phys = { .set_apic_id = x2apic_set_apic_id, .apic_id_mask = 0xFFFFFFFFu, - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = x2apic_send_IPI_mask, .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, -- cgit v1.2.3 From fbd24153c48b8425b09c161a020483cd77da870e Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 30 May 2012 18:40:03 -0600 Subject: x86/early_printk: Replace obsolete simple_strtoul() usage with kstrtoint() Change early_serial_init() to call kstrtoul() instead of calling obsoleted simple_strtoul(). Signed-off-by: Shuah Khan Cc: Joe Perches Link: http://lkml.kernel.org/r/1338424803.3569.5.camel@lorien2 Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 9b9f18b49918..5e4771266f1a 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -119,7 +119,7 @@ static __init void early_serial_init(char *s) unsigned char c; unsigned divisor; unsigned baud = DEFAULT_BAUD; - char *e; + ssize_t ret; if (*s == ',') ++s; @@ -127,14 +127,14 @@ static __init void early_serial_init(char *s) if (*s) { unsigned port; if (!strncmp(s, "0x", 2)) { - early_serial_base = simple_strtoul(s, &e, 16); + ret = kstrtoint(s, 16, &early_serial_base); } else { static const int __initconst bases[] = { 0x3f8, 0x2f8 }; if (!strncmp(s, "ttyS", 4)) s += 4; - port = simple_strtoul(s, &e, 10); - if (port > 1 || s == e) + ret = kstrtouint(s, 10, &port); + if (ret || port > 1) port = 0; early_serial_base = bases[port]; } @@ -149,8 +149,8 @@ static __init void early_serial_init(char *s) outb(0x3, early_serial_base + MCR); /* DTR + RTS */ if (*s) { - baud = simple_strtoul(s, &e, 0); - if (baud == 0 || s == e) + ret = kstrtouint(s, 0, &baud); + if (ret || baud == 0) baud = DEFAULT_BAUD; } -- cgit v1.2.3 From c00b275043adc14d668f36266b890f0c53d46640 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 29 May 2012 21:27:44 +0200 Subject: uprobes: Optimize is_swbp_at_addr() for current->mm Change is_swbp_at_addr() to try to avoid the costly read_opcode() if mm == current->mm, __copy_from_user_inatomic() should succeed in the likely case. Currently this optimization is not important, but we are going to add more is_swbp_at_addr(current->mm) callers. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120529192744.GA8057@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 985be4d80fe8..d0f5ec0dcdea 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -333,10 +333,20 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) uprobe_opcode_t opcode; int result; + if (current->mm == mm) { + pagefault_disable(); + result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr, + sizeof(opcode)); + pagefault_enable(); + + if (likely(result == 0)) + goto out; + } + result = read_opcode(mm, vaddr, &opcode); if (result) return result; - +out: if (is_swbp_insn(&opcode)) return 1; -- cgit v1.2.3 From a3d7bb47937b3a40b9f0c75655e97b3bb6407cbe Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 29 May 2012 21:27:59 +0200 Subject: uprobes: Change read_opcode() to use FOLL_FORCE set_orig_insn()->read_opcode() should not fail if the probed task did mprotect() after uprobe_register(), change it to use FOLL_FORCE. Without FOLL_WRITE this doesn't have any "side" effect but allows to read the !VM_READ memory. There is another reason for this change, we are going to use is_swbp_at_addr() from handle_swbp() which can race with another thread doing mprotect(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120529192759.GB8057@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index d0f5ec0dcdea..a0dbc87a2ec6 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -312,7 +312,7 @@ static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_ void *vaddr_new; int ret; - ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &page, NULL); + ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); if (ret <= 0) return ret; -- cgit v1.2.3 From 3a9ea0520f38def4a3915b91f82455b749f07d88 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 29 May 2012 21:28:57 +0200 Subject: uprobes: Introduce find_active_uprobe() helper No functional changes. Move the "find uprobe" code from handle_swbp() to the new helper, find_active_uprobe(). Note: with or without this change, the find-active-uprobe logic is not exactly right. We can race with another thread which unmaps the memory with the valid uprobe before we take mm->mmap_sem. We can't find this uprobe simply because find_vma() fails. In this case we wrongly assume that this trap was not caused by uprobe and send the erroneous SIGTRAP. See the next changes. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120529192857.GC8057@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 47 ++++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index a0dbc87a2ec6..eaf4d55fd424 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1489,38 +1489,47 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) return false; } -/* - * Run handler and ask thread to singlestep. - * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. - */ -static void handle_swbp(struct pt_regs *regs) +static struct uprobe *find_active_uprobe(unsigned long bp_vaddr) { + struct mm_struct *mm = current->mm; + struct uprobe *uprobe = NULL; struct vm_area_struct *vma; - struct uprobe_task *utask; - struct uprobe *uprobe; - struct mm_struct *mm; - unsigned long bp_vaddr; - uprobe = NULL; - bp_vaddr = uprobe_get_swbp_addr(regs); - mm = current->mm; down_read(&mm->mmap_sem); vma = find_vma(mm, bp_vaddr); - if (vma && vma->vm_start <= bp_vaddr && valid_vma(vma, false)) { - struct inode *inode; - loff_t offset; + if (vma && vma->vm_start <= bp_vaddr) { + if (valid_vma(vma, false)) { + struct inode *inode; + loff_t offset; - inode = vma->vm_file->f_mapping->host; - offset = bp_vaddr - vma->vm_start; - offset += (vma->vm_pgoff << PAGE_SHIFT); - uprobe = find_uprobe(inode, offset); + inode = vma->vm_file->f_mapping->host; + offset = bp_vaddr - vma->vm_start; + offset += (vma->vm_pgoff << PAGE_SHIFT); + uprobe = find_uprobe(inode, offset); + } } srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id); current->uprobe_srcu_id = -1; up_read(&mm->mmap_sem); + return uprobe; +} + +/* + * Run handler and ask thread to singlestep. + * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. + */ +static void handle_swbp(struct pt_regs *regs) +{ + struct uprobe_task *utask; + struct uprobe *uprobe; + unsigned long bp_vaddr; + + bp_vaddr = uprobe_get_swbp_addr(regs); + uprobe = find_active_uprobe(bp_vaddr); + if (!uprobe) { /* No matching uprobe; signal SIGTRAP. */ send_sig(SIGTRAP, current, 0); -- cgit v1.2.3 From d790d34653ab20c74034902f5f0889bba807949a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 29 May 2012 21:29:14 +0200 Subject: uprobes: Teach find_active_uprobe() to provide the "is_swbp" info A separate patch to simplify the review, and for the documentation. The patch adds another "int *is_swbp" argument to find_active_uprobe(), so far its only caller doesn't use this info. With this patch find_active_uprobe() additionally does: - if find_vma() + ->vm_start check fails, *is_swbp = -EFAULT - otherwise, if valid_vma() + find_uprobe() fails, it holds the result of is_swbp_at_addr(), can be negative too. The latter is only possible if we raced with another thread which did munmap/etc after we hit this bp. IOW. If find_active_uprobe(&is_swbp) returns NULL, the caller can look at is_swbp to figure out whether the current insn is bp or not, or detect the race with another thread if it is negative. Note: I think that performance-wise this change is fine. This adds is_swbp_at_addr(), but only if we raced with uprobe_unregister() or if we hit the "normal" int3 but this mm has uprobes as well. And even in this case the slow read_opcode() path is very unlikely, this insn recently triggered do_int3(), __copy_from_user_inatomic() shouldn't fail in the likely case. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120529192914.GD8057@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index eaf4d55fd424..ee3df704e78a 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1489,7 +1489,7 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) return false; } -static struct uprobe *find_active_uprobe(unsigned long bp_vaddr) +static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) { struct mm_struct *mm = current->mm; struct uprobe *uprobe = NULL; @@ -1497,7 +1497,6 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr) down_read(&mm->mmap_sem); vma = find_vma(mm, bp_vaddr); - if (vma && vma->vm_start <= bp_vaddr) { if (valid_vma(vma, false)) { struct inode *inode; @@ -1508,6 +1507,11 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr) offset += (vma->vm_pgoff << PAGE_SHIFT); uprobe = find_uprobe(inode, offset); } + + if (!uprobe) + *is_swbp = is_swbp_at_addr(mm, bp_vaddr); + } else { + *is_swbp = -EFAULT; } srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id); @@ -1526,9 +1530,10 @@ static void handle_swbp(struct pt_regs *regs) struct uprobe_task *utask; struct uprobe *uprobe; unsigned long bp_vaddr; + int is_swbp; bp_vaddr = uprobe_get_swbp_addr(regs); - uprobe = find_active_uprobe(bp_vaddr); + uprobe = find_active_uprobe(bp_vaddr, &is_swbp); if (!uprobe) { /* No matching uprobe; signal SIGTRAP. */ -- cgit v1.2.3 From 77fc4af1b59d12ab3b1467adf0a5204806853123 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 29 May 2012 21:29:28 +0200 Subject: uprobes: Change register_for_each_vma() to take mm->mmap_sem for writing Change register_for_each_vma() to take mm->mmap_sem for writing. This is a bit unfortunate but hopefully not too bad, this is the slow path anyway. This is needed to ensure that find_active_uprobe() can not race with uprobe_register() which adds the new bp at the same bp_vaddr, after find_uprobe() fails and before is_swbp_at_addr_fast() checks the memory. IOW, this is needed to ensure that if find_active_uprobe() returns NULL but is_swbp == true, we can safely assume that it was the "normal" int3 and we should send SIGTRAP. There is another reason for this change. We are going to replace uprobes_state->count with MMF_ flags set by register/unregister and cleared by find_active_uprobe(), and set/clear shouldn't race with each other. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120529192928.GE8057@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ee3df704e78a..a2ed82b4808c 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -853,12 +853,12 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) } mm = vi->mm; - down_read(&mm->mmap_sem); + down_write(&mm->mmap_sem); vma = find_vma(mm, (unsigned long)vi->vaddr); if (!vma || !valid_vma(vma, is_register)) { list_del(&vi->probe_list); kfree(vi); - up_read(&mm->mmap_sem); + up_write(&mm->mmap_sem); mmput(mm); continue; } @@ -867,7 +867,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) vaddr != vi->vaddr) { list_del(&vi->probe_list); kfree(vi); - up_read(&mm->mmap_sem); + up_write(&mm->mmap_sem); mmput(mm); continue; } @@ -877,7 +877,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) else remove_breakpoint(uprobe, mm, vi->vaddr); - up_read(&mm->mmap_sem); + up_write(&mm->mmap_sem); mmput(mm); if (is_register) { if (ret && ret == -EEXIST) -- cgit v1.2.3 From 56bb4cf6475d702d2fb00fc641aa6441097c0330 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 29 May 2012 21:29:47 +0200 Subject: uprobes: Teach handle_swbp() to rely on "is_swbp" rather than uprobes_srcu Currently handle_swbp() assumes that it can't race with unregister, so it roughly does: if (find_uprobe(vaddr)) process_uprobe(); else send_sig(SIGTRAP); This relies on the not-really-working uprobes_srcu code we are going to remove, see the next patch. With this patch we rely on the result of is_swbp_at_addr(bp_vaddr) if find_uprobe() fails. If is_swbp == 1, then we hit the normal int3, we should send SIGTRAP. If is_swbp == 0, we raced with uprobe_unregister(), we simply restart this insn again. The "difficult" case is is_swbp == -EFAULT, when we can't read this memory. In this case I think we should restart too, and this is more correct compared to the current code which sends SIGTRAP. Ignoring ENOMEM/etc from get_user_pages(), this can only happen if another thread unmaps this memory before find_active_uprobe() takes mmap_sem. It would be better to pretend it was unmapped before this insn was executed, restart, and get SIGSEGV. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120529192947.GF8057@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index a2ed82b4808c..1f02e3bbfc1d 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1530,14 +1530,26 @@ static void handle_swbp(struct pt_regs *regs) struct uprobe_task *utask; struct uprobe *uprobe; unsigned long bp_vaddr; - int is_swbp; + int uninitialized_var(is_swbp); bp_vaddr = uprobe_get_swbp_addr(regs); uprobe = find_active_uprobe(bp_vaddr, &is_swbp); if (!uprobe) { - /* No matching uprobe; signal SIGTRAP. */ - send_sig(SIGTRAP, current, 0); + if (is_swbp > 0) { + /* No matching uprobe; signal SIGTRAP. */ + send_sig(SIGTRAP, current, 0); + } else { + /* + * Either we raced with uprobe_unregister() or we can't + * access this memory. The latter is only possible if + * another thread plays with our ->mm. In both cases + * we can simply restart. If this vma was unmapped we + * can pretend this insn was not executed yet and get + * the (correct) SIGSEGV after restart. + */ + instruction_pointer_set(regs, bp_vaddr); + } return; } -- cgit v1.2.3 From 778b032d96909690c19d84f8d17c13be65ed6f8e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 29 May 2012 21:30:08 +0200 Subject: uprobes: Kill uprobes_srcu/uprobe_srcu_id Kill the no longer needed uprobes_srcu/uprobe_srcu_id code. It doesn't really work anyway. synchronize_srcu() can only synchronize with the code "inside" the srcu_read_lock/srcu_read_unlock section, while uprobe_pre_sstep_notifier() does srcu_read_lock() _after_ we already hit the breakpoint. I guess this probably works "in practice". synchronize_srcu() is slow and it implies synchronize_sched(), and the probed task enters the non- preemptible section at the start of exception handler. Still this is not right at least in theory, and task->uprobe_srcu_id blows task_struct. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120529193008.GG8057@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - kernel/events/uprobes.c | 22 +++------------------- 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 6029d8c54476..6bd19655c1a7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1569,7 +1569,6 @@ struct task_struct { #endif #ifdef CONFIG_UPROBES struct uprobe_task *utask; - int uprobe_srcu_id; #endif }; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 1f02e3bbfc1d..8c5e043cd309 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -38,7 +38,6 @@ #define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE -static struct srcu_struct uprobes_srcu; static struct rb_root uprobes_tree = RB_ROOT; static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ @@ -738,20 +737,14 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr) } /* - * There could be threads that have hit the breakpoint and are entering the - * notifier code and trying to acquire the uprobes_treelock. The thread - * calling delete_uprobe() that is removing the uprobe from the rb_tree can - * race with these threads and might acquire the uprobes_treelock compared - * to some of the breakpoint hit threads. In such a case, the breakpoint - * hit threads will not find the uprobe. The current unregistering thread - * waits till all other threads have hit a breakpoint, to acquire the - * uprobes_treelock before the uprobe is removed from the rbtree. + * There could be threads that have already hit the breakpoint. They + * will recheck the current insn and restart if find_uprobe() fails. + * See find_active_uprobe(). */ static void delete_uprobe(struct uprobe *uprobe) { unsigned long flags; - synchronize_srcu(&uprobes_srcu); spin_lock_irqsave(&uprobes_treelock, flags); rb_erase(&uprobe->rb_node, &uprobes_tree); spin_unlock_irqrestore(&uprobes_treelock, flags); @@ -1388,9 +1381,6 @@ void uprobe_free_utask(struct task_struct *t) { struct uprobe_task *utask = t->utask; - if (t->uprobe_srcu_id != -1) - srcu_read_unlock_raw(&uprobes_srcu, t->uprobe_srcu_id); - if (!utask) return; @@ -1408,7 +1398,6 @@ void uprobe_free_utask(struct task_struct *t) void uprobe_copy_process(struct task_struct *t) { t->utask = NULL; - t->uprobe_srcu_id = -1; } /* @@ -1513,9 +1502,6 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) } else { *is_swbp = -EFAULT; } - - srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id); - current->uprobe_srcu_id = -1; up_read(&mm->mmap_sem); return uprobe; @@ -1656,7 +1642,6 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs) utask->state = UTASK_BP_HIT; set_thread_flag(TIF_UPROBE); - current->uprobe_srcu_id = srcu_read_lock_raw(&uprobes_srcu); return 1; } @@ -1691,7 +1676,6 @@ static int __init init_uprobes(void) mutex_init(&uprobes_mutex[i]); mutex_init(&uprobes_mmap_mutex[i]); } - init_srcu_struct(&uprobes_srcu); return register_die_notifier(&uprobe_exception_nb); } -- cgit v1.2.3 From 5a425294ee7d4ab5a374248e85838dfd450caf75 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Jun 2012 15:30:31 +0200 Subject: perf/x86: Fix Intel shared extra MSR allocation Zheng Yan reported that event group validation can wreck event state when Intel extra_reg allocation changes event state. Validation shouldn't change any persistent state. Cloning events in validate_{event,group}() isn't really pretty either, so add a few special cases to avoid modifying the event state. The code is restructured to minimize the special case impact. Reported-by: Zheng Yan Acked-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1338903031.28282.175.camel@twins Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 1 + arch/x86/kernel/cpu/perf_event.h | 1 + arch/x86/kernel/cpu/perf_event_intel.c | 92 +++++++++++++++++++++++----------- 3 files changed, 66 insertions(+), 28 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e049d6da0183..cb608383e4f6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void) if (!cpuc->shared_regs) goto error; } + cpuc->is_fake = 1; return cpuc; error: free_fake_cpuc(cpuc); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6638aaf54493..83794d8e6af0 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -117,6 +117,7 @@ struct cpu_hw_events { struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ unsigned int group_flag; + int is_fake; /* * Intel DebugStore bits diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 166546ec6aef..965baa2fa790 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event) return NULL; } -static bool intel_try_alt_er(struct perf_event *event, int orig_idx) +static int intel_alt_er(int idx) { if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) - return false; + return idx; - if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { - event->hw.config &= ~INTEL_ARCH_EVENT_MASK; - event->hw.config |= 0x01bb; - event->hw.extra_reg.idx = EXTRA_REG_RSP_1; - event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; - } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { + if (idx == EXTRA_REG_RSP_0) + return EXTRA_REG_RSP_1; + + if (idx == EXTRA_REG_RSP_1) + return EXTRA_REG_RSP_0; + + return idx; +} + +static void intel_fixup_er(struct perf_event *event, int idx) +{ + event->hw.extra_reg.idx = idx; + + if (idx == EXTRA_REG_RSP_0) { event->hw.config &= ~INTEL_ARCH_EVENT_MASK; event->hw.config |= 0x01b7; - event->hw.extra_reg.idx = EXTRA_REG_RSP_0; event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; + } else if (idx == EXTRA_REG_RSP_1) { + event->hw.config &= ~INTEL_ARCH_EVENT_MASK; + event->hw.config |= 0x01bb; + event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; } - - if (event->hw.extra_reg.idx == orig_idx) - return false; - - return true; } /* @@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, struct event_constraint *c = &emptyconstraint; struct er_account *era; unsigned long flags; - int orig_idx = reg->idx; + int idx = reg->idx; - /* already allocated shared msr */ - if (reg->alloc) + /* + * reg->alloc can be set due to existing state, so for fake cpuc we + * need to ignore this, otherwise we might fail to allocate proper fake + * state for this extra reg constraint. Also see the comment below. + */ + if (reg->alloc && !cpuc->is_fake) return NULL; /* call x86_get_event_constraint() */ again: - era = &cpuc->shared_regs->regs[reg->idx]; + era = &cpuc->shared_regs->regs[idx]; /* * we use spin_lock_irqsave() to avoid lockdep issues when * passing a fake cpuc @@ -1173,6 +1183,29 @@ again: if (!atomic_read(&era->ref) || era->config == reg->config) { + /* + * If its a fake cpuc -- as per validate_{group,event}() we + * shouldn't touch event state and we can avoid doing so + * since both will only call get_event_constraints() once + * on each event, this avoids the need for reg->alloc. + * + * Not doing the ER fixup will only result in era->reg being + * wrong, but since we won't actually try and program hardware + * this isn't a problem either. + */ + if (!cpuc->is_fake) { + if (idx != reg->idx) + intel_fixup_er(event, idx); + + /* + * x86_schedule_events() can call get_event_constraints() + * multiple times on events in the case of incremental + * scheduling(). reg->alloc ensures we only do the ER + * allocation once. + */ + reg->alloc = 1; + } + /* lock in msr value */ era->config = reg->config; era->reg = reg->reg; @@ -1180,17 +1213,17 @@ again: /* one more user */ atomic_inc(&era->ref); - /* no need to reallocate during incremental event scheduling */ - reg->alloc = 1; - /* * need to call x86_get_event_constraint() * to check if associated event has constraints */ c = NULL; - } else if (intel_try_alt_er(event, orig_idx)) { - raw_spin_unlock_irqrestore(&era->lock, flags); - goto again; + } else { + idx = intel_alt_er(idx); + if (idx != reg->idx) { + raw_spin_unlock_irqrestore(&era->lock, flags); + goto again; + } } raw_spin_unlock_irqrestore(&era->lock, flags); @@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, struct er_account *era; /* - * only put constraint if extra reg was actually - * allocated. Also takes care of event which do - * not use an extra shared reg + * Only put constraint if extra reg was actually allocated. Also takes + * care of event which do not use an extra shared reg. + * + * Also, if this is a fake cpuc we shouldn't touch any event state + * (reg->alloc) and we don't care about leaving inconsistent cpuc state + * either since it'll be thrown out. */ - if (!reg->alloc) + if (!reg->alloc || cpuc->is_fake) return; era = &cpuc->shared_regs->regs[reg->idx]; -- cgit v1.2.3 From 0780c927a02492f917a74f51f3c801c76a637c57 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Jun 2012 10:26:43 +0200 Subject: perf/x86: Implement cycles:p for SNB/IVB Now that there's finally a chip with working PEBS (IvyBridge), we can enable the hardware and implement cycles:p for SNB/IVB. Cc: Stephane Eranian Requested-and-tested-by: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1338884803.28282.153.camel@twins Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 1 + arch/x86/kernel/cpu/perf_event_intel.c | 50 ++++++++++++++++++++++++++++------ 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 83794d8e6af0..7241e2fc3c17 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -365,6 +365,7 @@ struct x86_pmu { int pebs_record_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; + void (*pebs_aliases)(struct perf_event *event); /* * Intel LBR diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 965baa2fa790..2312c1ff1b19 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1336,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc, intel_put_shared_regs_event_constraints(cpuc, event); } -static int intel_pmu_hw_config(struct perf_event *event) +static void intel_pebs_aliases_core2(struct perf_event *event) { - int ret = x86_pmu_hw_config(event); - - if (ret) - return ret; - - if (event->attr.precise_ip && - (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { + if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { /* * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P * (0x003c) so that we can use it with PEBS. @@ -1365,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event) */ u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); + alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); + event->hw.config = alt_config; + } +} + +static void intel_pebs_aliases_snb(struct perf_event *event) +{ + if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { + /* + * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P + * (0x003c) so that we can use it with PEBS. + * + * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't + * PEBS capable. However we can use UOPS_RETIRED.ALL + * (0x01c2), which is a PEBS capable event, to get the same + * count. + * + * UOPS_RETIRED.ALL counts the number of cycles that retires + * CNTMASK micro-ops. By setting CNTMASK to a value (16) + * larger than the maximum number of micro-ops that can be + * retired per cycle (4) and then inverting the condition, we + * count all cycles that retire 16 or less micro-ops, which + * is every cycle. + * + * Thereby we gain a PEBS capable cycle counter. + */ + u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16); alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); event->hw.config = alt_config; } +} + +static int intel_pmu_hw_config(struct perf_event *event) +{ + int ret = x86_pmu_hw_config(event); + + if (ret) + return ret; + + if (event->attr.precise_ip && x86_pmu.pebs_aliases) + x86_pmu.pebs_aliases(event); if (intel_pmu_needs_lbr_smpl(event)) { ret = intel_pmu_setup_lbr_filter(event); @@ -1643,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = { .max_period = (1ULL << 31) - 1, .get_event_constraints = intel_get_event_constraints, .put_event_constraints = intel_put_event_constraints, + .pebs_aliases = intel_pebs_aliases_core2, .format_attrs = intel_arch3_formats_attr, @@ -1885,6 +1918,7 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; + x86_pmu.pebs_aliases = intel_pebs_aliases_snb; x86_pmu.extra_regs = intel_snb_extra_regs; /* all extra regs are per-cpu when HT is on */ x86_pmu.er_flags |= ERF_HAS_RSP_1; -- cgit v1.2.3 From 47a8863dbb11745446314ca126593789ab74d93a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Jun 2012 10:26:43 +0200 Subject: perf/x86: Enable/Add IvyBridge hardware support Implement rudimentary IVB perf support. The SDM states its identical to SNB with exception of the exact event tables, but a quick look suggests they're similar enough. Also mark SNB-EP as broken for now. Requested-and-tested-by: Linus Torvalds Cc: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1338884803.28282.153.camel@twins Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 2312c1ff1b19..187c294bc658 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1909,8 +1909,9 @@ __init int intel_pmu_init(void) break; case 42: /* SandyBridge */ - x86_add_quirk(intel_sandybridge_quirk); case 45: /* SandyBridge, "Romely-EP" */ + x86_add_quirk(intel_sandybridge_quirk); + case 58: /* IvyBridge */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); -- cgit v1.2.3 From 212d95dfdb66e5c81879b08e4f7fbfc8498b1ab5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Jun 2012 10:26:43 +0200 Subject: perf/x86: Update SNB PEBS constraints Afaict there's no need to (incompletely) iterate the MEM_UOPS_RETIRED.* umask state. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1338884803.28282.153.camel@twins Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 5a3edc27f6e5..35e2192df9f4 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ - INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ - INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ - INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ -- cgit v1.2.3 From 1c2ac3fde3e35279958e7b0408e2dcf866465301 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 May 2012 15:25:34 +0200 Subject: perf/x86: Fix wrmsrl() debug wrapper Move the wrmslr() debug wrapper to the common header now that all the include games are gone. Also clean it up a bit to avoid multiple evaluation of the argument. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-l4gkfnivwv4yi5mqxjlovymx@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 11 ----------- arch/x86/kernel/cpu/perf_event.h | 12 ++++++++++++ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c4706cf9c011..43c2017347e7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -35,17 +35,6 @@ #include "perf_event.h" -#if 0 -#undef wrmsrl -#define wrmsrl(msr, val) \ -do { \ - trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ - (unsigned long)(val)); \ - native_write_msr((msr), (u32)((u64)(val)), \ - (u32)((u64)(val) >> 32)); \ -} while (0) -#endif - struct x86_pmu x86_pmu __read_mostly; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 7241e2fc3c17..23b5710b1747 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -14,6 +14,18 @@ #include +#if 0 +#undef wrmsrl +#define wrmsrl(msr, val) \ +do { \ + unsigned int _msr = (msr); \ + u64 _val = (val); \ + trace_printk("wrmsrl(%x, %Lx)\n", (unsigned int)(_msr), \ + (unsigned long long)(_val)); \ + native_write_msr((_msr), (u32)(_val), (u32)(_val >> 32)); \ +} while (0) +#endif + /* * | NHM/WSM | SNB | * register ------------------------------- -- cgit v1.2.3 From 1ff4d58a192aea7f245981e2579765f961f6eb9c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 5 Jun 2012 17:56:50 -0700 Subject: x86: Add rdpmcl() Add a version of rdpmc() that directly reads into a u64 Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1338944211-28275-4-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 2 ++ arch/x86/include/asm/paravirt.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 084ef95274cd..e489c1475be9 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -237,6 +237,8 @@ do { \ (high) = (u32)(_l >> 32); \ } while (0) +#define rdpmcl(counter, val) ((val) = native_read_pmc(counter)) + #define rdtscp(low, high, aux) \ do { \ unsigned long long _val = native_read_tscp(&(aux)); \ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 6cbbabf52707..14ce05dfe04e 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -252,6 +252,8 @@ do { \ high = _l >> 32; \ } while (0) +#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter)) + static inline unsigned long long paravirt_rdtscp(unsigned int *aux) { return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); -- cgit v1.2.3 From c48b60538c3ba05a7a2713c4791b25405525431b Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Thu, 1 Mar 2012 17:28:14 -0500 Subject: perf/x86: Use rdpmc() rather than rdmsr() when possible in the kernel The rdpmc instruction is faster than the equivelant rdmsr call, so use it when possible in the kernel. The perfctr kernel patches did this, after extensive testing showed rdpmc to always be faster (One can look in etc/costs in the perfctr-2.6 package to see a historical list of the overhead). I have done some tests on a 3.2 kernel, the kernel module I used was included in the first posting of this patch: rdmsr rdpmc Core2 T9900: 203.9 cycles 30.9 cycles AMD fam0fh: 56.2 cycles 9.8 cycles Atom 6/28/2: 129.7 cycles 50.6 cycles The speedup of using rdpmc is large. [ It's probably possible (and desirable) to do this without requiring a new field in the hw_perf_event structure, but the fixed events make this tricky. ] Signed-off-by: Vince Weaver Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/alpine.DEB.2.00.1203011724030.26934@cl320.eecs.utk.edu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 4 +++- include/linux/perf_event.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 43c2017347e7..000a4746c7ce 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -75,7 +75,7 @@ u64 x86_perf_event_update(struct perf_event *event) */ again: prev_raw_count = local64_read(&hwc->prev_count); - rdmsrl(hwc->event_base, new_raw_count); + rdpmcl(hwc->event_base_rdpmc, new_raw_count); if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) @@ -819,9 +819,11 @@ static inline void x86_assign_hw_event(struct perf_event *event, } else if (hwc->idx >= X86_PMC_IDX_FIXED) { hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED); + hwc->event_base_rdpmc = (hwc->idx - X86_PMC_IDX_FIXED) | 1<<30; } else { hwc->config_base = x86_pmu_config_addr(hwc->idx); hwc->event_base = x86_pmu_event_addr(hwc->idx); + hwc->event_base_rdpmc = x86_pmu_addr_offset(hwc->idx); } } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 45db49f64bb4..1ce887abcc5c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -677,6 +677,7 @@ struct hw_perf_event { u64 last_tag; unsigned long config_base; unsigned long event_base; + int event_base_rdpmc; int idx; int last_cpu; -- cgit v1.2.3 From 70ab7003dec58afeae7f5d681dfa309b3a259f03 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 5 Jun 2012 17:56:48 -0700 Subject: perf/x86: Don't assume there can be only 4 PEBS events On Sandy Bridge in non HT mode there are 8 counters available. Since every counter can write a PEBS record assuming there are 4 max is incorrect. Use the reported counter number -- with an upper limit for a static array -- instead. Also I made the warning messages a bit more informational. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1338944211-28275-2-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 3 ++- arch/x86/kernel/cpu/perf_event_intel.c | 2 ++ arch/x86/kernel/cpu/perf_event_intel_ds.c | 8 ++++---- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 23b5710b1747..3df3de9452a9 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -69,7 +69,7 @@ struct amd_nb { }; /* The maximal number of PEBS events: */ -#define MAX_PEBS_EVENTS 4 +#define MAX_PEBS_EVENTS 8 /* * A debug store configuration. @@ -378,6 +378,7 @@ struct x86_pmu { void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); + int max_pebs_events; /* * Intel LBR diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 187c294bc658..e23e71f25264 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1800,6 +1800,8 @@ __init int intel_pmu_init(void) x86_pmu.events_maskl = ebx.full; x86_pmu.events_mask_len = eax.split.mask_length; + x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); + /* * Quirk: v2 perfmon does not report fixed-purpose events, so * assume at least 3 events: diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 35e2192df9f4..026373edef7f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -620,7 +620,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) * Should not happen, we program the threshold at 1 and do not * set a reset value. */ - WARN_ON_ONCE(n > 1); + WARN_ONCE(n > 1, "bad leftover pebs %d\n", n); at += n - 1; __intel_pmu_pebs_event(event, iregs, at); @@ -651,10 +651,10 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) * Should not happen, we program the threshold at 1 and do not * set a reset value. */ - WARN_ON_ONCE(n > MAX_PEBS_EVENTS); + WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n); for ( ; at < top; at++) { - for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { + for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) { event = cpuc->events[bit]; if (!test_bit(bit, cpuc->active_mask)) continue; @@ -670,7 +670,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) break; } - if (!event || bit >= MAX_PEBS_EVENTS) + if (!event || bit >= x86_pmu.max_pebs_events) continue; __intel_pmu_pebs_event(event, iregs, at); -- cgit v1.2.3 From 7fbb98c5cb07563d3ee08714073a8e5452a96be2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 7 Jun 2012 10:21:21 -0400 Subject: x86: Save cr2 in NMI in case NMIs take a page fault Avi Kivity reported that page faults in NMIs could cause havic if the NMI preempted another page fault handler: The recent changes to NMI allow exceptions to take place in NMI handlers, but I think that a #PF (say, due to access to vmalloc space) is still problematic. Consider the sequence #PF (cr2 set by processor) NMI ... #PF (cr2 clobbered) do_page_fault() IRET ... IRET do_page_fault() address = read_cr2() The last line reads the overwritten cr2 value. Originally I wrote a patch to solve this by saving the cr2 on the stack. Brian Gerst suggested to save it in the r12 register as both r12 and rbx are saved by the do_nmi handler as required by the C standard. But rbx is already used for saving if swapgs needs to be run on exit of the NMI handler. Link: http://lkml.kernel.org/r/4FBB8C40.6080304@redhat.com Link: http://lkml.kernel.org/r/1337763411.13348.140.camel@gandalf.stny.rr.com Reported-by: Avi Kivity Cc: Linus Torvalds Cc: H. Peter Anvin Cc: Thomas Gleixner Suggested-by: Brian Gerst Signed-off-by: Steven Rostedt --- arch/x86/kernel/entry_64.S | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7d65133b51be..111f6bbd8b38 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1758,10 +1758,30 @@ end_repeat_nmi: */ call save_paranoid DEFAULT_FRAME 0 + + /* + * Save off the CR2 register. If we take a page fault in the NMI then + * it could corrupt the CR2 value. If the NMI preempts a page fault + * handler before it was able to read the CR2 register, and then the + * NMI itself takes a page fault, the page fault that was preempted + * will read the information from the NMI page fault and not the + * origin fault. Save it off and restore it if it changes. + * Use the r12 callee-saved register. + */ + movq %cr2, %r12 + /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp,%rdi movq $-1,%rsi call do_nmi + + /* Did the NMI take a page fault? Restore cr2 if it did */ + movq %cr2, %rcx + cmpq %rcx, %r12 + je 1f + movq %r12, %cr2 +1: + testl %ebx,%ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: -- cgit v1.2.3 From 1f975f78c84c852e09463a2dfa57e3174e5c719e Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 1 Jun 2012 16:52:35 +0200 Subject: x86, pvops: Remove hooks for {rd,wr}msr_safe_regs There were paravirt_ops hooks for the full register set variant of {rd,wr}msr_safe which are actually not used by anyone anymore. Remove them to make the code cleaner and avoid silent breakages when the pvops members were uninitialized. This has been boot-tested natively and under Xen with PVOPS enabled and disabled on one machine. Signed-off-by: Andre Przywara Link: http://lkml.kernel.org/r/1338562358-28182-2-git-send-email-bp@amd64.org Acked-by: Konrad Rzeszutek Wilk Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msr.h | 67 +++++++++++++++-------------------- arch/x86/include/asm/paravirt.h | 39 -------------------- arch/x86/include/asm/paravirt_types.h | 2 -- arch/x86/kernel/paravirt.c | 2 -- arch/x86/lib/msr-reg-export.c | 4 +-- arch/x86/lib/msr-reg.S | 10 +++--- arch/x86/xen/enlighten.c | 2 -- 7 files changed, 35 insertions(+), 91 deletions(-) diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 084ef95274cd..81860cc012d1 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -115,8 +115,8 @@ notrace static inline int native_write_msr_safe(unsigned int msr, extern unsigned long long native_read_tsc(void); -extern int native_rdmsr_safe_regs(u32 regs[8]); -extern int native_wrmsr_safe_regs(u32 regs[8]); +extern int rdmsr_safe_regs(u32 regs[8]); +extern int wrmsr_safe_regs(u32 regs[8]); static __always_inline unsigned long long __native_read_tsc(void) { @@ -187,43 +187,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) return err; } -static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) -{ - u32 gprs[8] = { 0 }; - int err; - - gprs[1] = msr; - gprs[7] = 0x9c5a203a; - - err = native_rdmsr_safe_regs(gprs); - - *p = gprs[0] | ((u64)gprs[2] << 32); - - return err; -} - -static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) -{ - u32 gprs[8] = { 0 }; - - gprs[0] = (u32)val; - gprs[1] = msr; - gprs[2] = val >> 32; - gprs[7] = 0x9c5a203a; - - return native_wrmsr_safe_regs(gprs); -} - -static inline int rdmsr_safe_regs(u32 regs[8]) -{ - return native_rdmsr_safe_regs(regs); -} - -static inline int wrmsr_safe_regs(u32 regs[8]) -{ - return native_wrmsr_safe_regs(regs); -} - #define rdtscl(low) \ ((low) = (u32)__native_read_tsc()) @@ -248,6 +211,32 @@ do { \ #endif /* !CONFIG_PARAVIRT */ +static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) +{ + u32 gprs[8] = { 0 }; + int err; + + gprs[1] = msr; + gprs[7] = 0x9c5a203a; + + err = rdmsr_safe_regs(gprs); + + *p = gprs[0] | ((u64)gprs[2] << 32); + + return err; +} + +static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) +{ + u32 gprs[8] = { 0 }; + + gprs[0] = (u32)val; + gprs[1] = msr; + gprs[2] = val >> 32; + gprs[7] = 0x9c5a203a; + + return wrmsr_safe_regs(gprs); +} #define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \ (u32)((val) >> 32)) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 6cbbabf52707..ebb0cdb60a89 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -128,21 +128,11 @@ static inline u64 paravirt_read_msr(unsigned msr, int *err) return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); } -static inline int paravirt_rdmsr_regs(u32 *regs) -{ - return PVOP_CALL1(int, pv_cpu_ops.rdmsr_regs, regs); -} - static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) { return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); } -static inline int paravirt_wrmsr_regs(u32 *regs) -{ - return PVOP_CALL1(int, pv_cpu_ops.wrmsr_regs, regs); -} - /* These should all do BUG_ON(_err), but our headers are too tangled. */ #define rdmsr(msr, val1, val2) \ do { \ @@ -176,9 +166,6 @@ do { \ _err; \ }) -#define rdmsr_safe_regs(regs) paravirt_rdmsr_regs(regs) -#define wrmsr_safe_regs(regs) paravirt_wrmsr_regs(regs) - static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) { int err; @@ -186,32 +173,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) *p = paravirt_read_msr(msr, &err); return err; } -static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) -{ - u32 gprs[8] = { 0 }; - int err; - - gprs[1] = msr; - gprs[7] = 0x9c5a203a; - - err = paravirt_rdmsr_regs(gprs); - - *p = gprs[0] | ((u64)gprs[2] << 32); - - return err; -} - -static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) -{ - u32 gprs[8] = { 0 }; - - gprs[0] = (u32)val; - gprs[1] = msr; - gprs[2] = val >> 32; - gprs[7] = 0x9c5a203a; - - return paravirt_wrmsr_regs(gprs); -} static inline u64 paravirt_read_tsc(void) { diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 8e8b9a4987ee..8613cbb7ba41 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -153,9 +153,7 @@ struct pv_cpu_ops { /* MSR, PMC and TSR operations. err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ u64 (*read_msr)(unsigned int msr, int *err); - int (*rdmsr_regs)(u32 *regs); int (*write_msr)(unsigned int msr, unsigned low, unsigned high); - int (*wrmsr_regs)(u32 *regs); u64 (*read_tsc)(void); u64 (*read_pmc)(int counter); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 9ce885996fd7..17fff18a1031 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -352,9 +352,7 @@ struct pv_cpu_ops pv_cpu_ops = { #endif .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, - .rdmsr_regs = native_rdmsr_safe_regs, .write_msr = native_write_msr_safe, - .wrmsr_regs = native_wrmsr_safe_regs, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, .read_tscp = native_read_tscp, diff --git a/arch/x86/lib/msr-reg-export.c b/arch/x86/lib/msr-reg-export.c index a311cc59b65d..8d6ef78b5d01 100644 --- a/arch/x86/lib/msr-reg-export.c +++ b/arch/x86/lib/msr-reg-export.c @@ -1,5 +1,5 @@ #include #include -EXPORT_SYMBOL(native_rdmsr_safe_regs); -EXPORT_SYMBOL(native_wrmsr_safe_regs); +EXPORT_SYMBOL(rdmsr_safe_regs); +EXPORT_SYMBOL(wrmsr_safe_regs); diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index 69fa10623f21..f6d13eefad10 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -6,13 +6,13 @@ #ifdef CONFIG_X86_64 /* - * int native_{rdmsr,wrmsr}_safe_regs(u32 gprs[8]); + * int {rdmsr,wrmsr}_safe_regs(u32 gprs[8]); * * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi] * */ .macro op_safe_regs op -ENTRY(native_\op\()_safe_regs) +ENTRY(\op\()_safe_regs) CFI_STARTPROC pushq_cfi %rbx pushq_cfi %rbp @@ -45,13 +45,13 @@ ENTRY(native_\op\()_safe_regs) _ASM_EXTABLE(1b, 3b) CFI_ENDPROC -ENDPROC(native_\op\()_safe_regs) +ENDPROC(\op\()_safe_regs) .endm #else /* X86_32 */ .macro op_safe_regs op -ENTRY(native_\op\()_safe_regs) +ENTRY(\op\()_safe_regs) CFI_STARTPROC pushl_cfi %ebx pushl_cfi %ebp @@ -92,7 +92,7 @@ ENTRY(native_\op\()_safe_regs) _ASM_EXTABLE(1b, 3b) CFI_ENDPROC -ENDPROC(native_\op\()_safe_regs) +ENDPROC(\op\()_safe_regs) .endm #endif diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index e74df9548a02..60f1131eb94f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1116,9 +1116,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, - .rdmsr_regs = native_rdmsr_safe_regs, .write_msr = xen_write_msr_safe, - .wrmsr_regs = native_wrmsr_safe_regs, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, -- cgit v1.2.3 From ecd431d95aa04257e977fd6878e4203ce462e7e9 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 1 Jun 2012 16:52:36 +0200 Subject: x86, cpu: Fix show_msr MSR accessing function There's no real reason why, when showing the MSRs on a CPU at boottime, we should be using the AMD-specific variant. Simply use the generic safe one which handles #GPs just fine. Cc: Yinghai Lu Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1338562358-28182-3-git-send-email-bp@amd64.org Acked-by: Konrad Rzeszutek Wilk Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6b9333b429ba..5bbc082c47ad 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -947,7 +947,7 @@ static void __cpuinit __print_cpu_msr(void) index_max = msr_range_array[i].max; for (index = index_min; index < index_max; index++) { - if (rdmsrl_amd_safe(index, &val)) + if (rdmsrl_safe(index, &val)) continue; printk(KERN_INFO " MSR%08x: %016llx\n", index, val); } -- cgit v1.2.3 From 169e9cbd77db23fe50bc8ba68bf081adb67b4220 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 1 Jun 2012 16:52:37 +0200 Subject: x86, cpu, amd: Fix crash as Xen Dom0 on AMD Trinity systems f7f286a910221 ("x86/amd: Re-enable CPU topology extensions in case BIOS has disabled it") wrongfully added code which used the AMD-specific {rd,wr}msr variants for no real reason. This caused boot panics on xen which wasn't initializing the {rd,wr}msr_safe_regs pv_ops members properly. This, in turn, caused a heated discussion leading to us reviewing all uses of the AMD-specific variants and removing them where unneeded (almost everywhere except an obscure K8 BIOS fix, see 6b0f43ddfa358). Finally, this patch switches to the standard {rd,wr}msr*_safe* variants which should've been used in the first place anyway and avoided unneeded excitation with xen. Signed-off-by: Andre Przywara Link: http://lkml.kernel.org/r/1338562358-28182-4-git-send-email-bp@amd64.org Cc: Andreas Herrmann Link: [Boris: correct and expand commit message] Signed-off-by: Borislav Petkov Acked-by: Konrad Rzeszutek Wilk Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 146bb6218eec..80ccd99542e6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -586,9 +586,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) !cpu_has(c, X86_FEATURE_TOPOEXT)) { u64 val; - if (!rdmsrl_amd_safe(0xc0011005, &val)) { + if (!rdmsrl_safe(0xc0011005, &val)) { val |= 1ULL << 54; - wrmsrl_amd_safe(0xc0011005, val); + checking_wrmsrl(0xc0011005, val); rdmsrl(0xc0011005, val); if (val & (1ULL << 54)) { set_cpu_cap(c, X86_FEATURE_TOPOEXT); -- cgit v1.2.3 From 2c929ce6f1ed1302be225512b433e6a6554f71a4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 1 Jun 2012 16:52:38 +0200 Subject: x86, cpu, amd: Deprecate AMD-specific MSR variants Now that all users of {rd,wr}msr_amd_safe have been fixed, deprecate its use by making them private to amd.c and adding warnings when used on anything else beside K8. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1338562358-28182-5-git-send-email-bp@amd64.org Acked-by: Konrad Rzeszutek Wilk Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msr.h | 27 --------------------------- arch/x86/kernel/cpu/amd.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 81860cc012d1..cb33b5f00267 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -211,33 +211,6 @@ do { \ #endif /* !CONFIG_PARAVIRT */ -static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) -{ - u32 gprs[8] = { 0 }; - int err; - - gprs[1] = msr; - gprs[7] = 0x9c5a203a; - - err = rdmsr_safe_regs(gprs); - - *p = gprs[0] | ((u64)gprs[2] << 32); - - return err; -} - -static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) -{ - u32 gprs[8] = { 0 }; - - gprs[0] = (u32)val; - gprs[1] = msr; - gprs[2] = val >> 32; - gprs[7] = 0x9c5a203a; - - return wrmsr_safe_regs(gprs); -} - #define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \ (u32)((val) >> 32)) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 80ccd99542e6..c928eb26ada6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -19,6 +19,39 @@ #include "cpu.h" +static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) +{ + struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + u32 gprs[8] = { 0 }; + int err; + + WARN_ONCE((c->x86 != 0xf), "%s should only be used on K8!\n", __func__); + + gprs[1] = msr; + gprs[7] = 0x9c5a203a; + + err = rdmsr_safe_regs(gprs); + + *p = gprs[0] | ((u64)gprs[2] << 32); + + return err; +} + +static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) +{ + struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + u32 gprs[8] = { 0 }; + + WARN_ONCE((c->x86 != 0xf), "%s should only be used on K8!\n", __func__); + + gprs[0] = (u32)val; + gprs[1] = msr; + gprs[2] = val >> 32; + gprs[7] = 0x9c5a203a; + + return wrmsr_safe_regs(gprs); +} + #ifdef CONFIG_X86_32 /* * B step AMD K6 before B 9730xxxx have hardware bugs that can cause -- cgit v1.2.3 From 715c85b1fc824e9cd0ea07d6ceb80d2262f32e90 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 7 Jun 2012 13:32:04 -0700 Subject: x86, cpu: Rename checking_wrmsrl() to wrmsrl_safe() Rename checking_wrmsrl() to wrmsrl_safe(), to match the naming convention used by all the other MSR access functions/macros. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msr.h | 2 +- arch/x86/kernel/cpu/amd.c | 4 ++-- arch/x86/kernel/cpu/perf_event.c | 2 +- arch/x86/kernel/cpu/perf_event_intel.c | 6 +++--- arch/x86/kernel/cpu/perf_event_p4.c | 14 +++++++------- arch/x86/kernel/cpu/perf_event_p6.c | 4 ++-- arch/x86/kernel/process_64.c | 4 ++-- arch/x86/vdso/vdso32-setup.c | 6 +++--- 8 files changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index cb33b5f00267..fe83d74a920d 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -211,7 +211,7 @@ do { \ #endif /* !CONFIG_PARAVIRT */ -#define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \ +#define wrmsrl_safe(msr, val) wrmsr_safe((msr), (u32)(val), \ (u32)((val) >> 32)) #define write_tsc(val1, val2) wrmsr(MSR_IA32_TSC, (val1), (val2)) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c928eb26ada6..9d92e19039f0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -621,7 +621,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (!rdmsrl_safe(0xc0011005, &val)) { val |= 1ULL << 54; - checking_wrmsrl(0xc0011005, val); + wrmsrl_safe(0xc0011005, val); rdmsrl(0xc0011005, val); if (val & (1ULL << 54)) { set_cpu_cap(c, X86_FEATURE_TOPOEXT); @@ -712,7 +712,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask); if (err == 0) { mask |= (1 << 10); - checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask); + wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask); } } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e049d6da0183..4e3ba9cb5a4e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -222,7 +222,7 @@ static bool check_hw_exists(void) * that don't trap on the MSR access and always return 0s. */ val = 0xabcdUL; - ret = checking_wrmsrl(x86_pmu_event_addr(0), val); + ret = wrmsrl_safe(x86_pmu_event_addr(0), val); ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new); if (ret || val != val_new) goto msr_fail; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 166546ec6aef..7789aa37c746 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1003,11 +1003,11 @@ static void intel_pmu_reset(void) printk("clearing PMU state on CPU#%d\n", smp_processor_id()); for (idx = 0; idx < x86_pmu.num_counters; idx++) { - checking_wrmsrl(x86_pmu_config_addr(idx), 0ull); - checking_wrmsrl(x86_pmu_event_addr(idx), 0ull); + wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); + wrmsrl_safe(x86_pmu_event_addr(idx), 0ull); } for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) - checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); + wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); if (ds) ds->bts_index = ds->bts_buffer_base; diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 47124a73dd73..6c82e4037989 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -895,8 +895,8 @@ static void p4_pmu_disable_pebs(void) * So at moment let leave metrics turned on forever -- it's * ok for now but need to be revisited! * - * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0); - * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0); + * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)0); + * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)0); */ } @@ -909,7 +909,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event) * state we need to clear P4_CCCR_OVF, otherwise interrupt get * asserted again and again */ - (void)checking_wrmsrl(hwc->config_base, + (void)wrmsrl_safe(hwc->config_base, (u64)(p4_config_unpack_cccr(hwc->config)) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); } @@ -943,8 +943,8 @@ static void p4_pmu_enable_pebs(u64 config) bind = &p4_pebs_bind_map[idx]; - (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); - (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); + (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); + (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); } static void p4_pmu_enable_event(struct perf_event *event) @@ -978,8 +978,8 @@ static void p4_pmu_enable_event(struct perf_event *event) */ p4_pmu_enable_pebs(hwc->config); - (void)checking_wrmsrl(escr_addr, escr_conf); - (void)checking_wrmsrl(hwc->config_base, + (void)wrmsrl_safe(escr_addr, escr_conf); + (void)wrmsrl_safe(hwc->config_base, (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); } diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 32bcfc7dd230..e4dd0f7a0453 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -71,7 +71,7 @@ p6_pmu_disable_event(struct perf_event *event) if (cpuc->enabled) val |= ARCH_PERFMON_EVENTSEL_ENABLE; - (void)checking_wrmsrl(hwc->config_base, val); + (void)wrmsrl_safe(hwc->config_base, val); } static void p6_pmu_enable_event(struct perf_event *event) @@ -84,7 +84,7 @@ static void p6_pmu_enable_event(struct perf_event *event) if (cpuc->enabled) val |= ARCH_PERFMON_EVENTSEL_ENABLE; - (void)checking_wrmsrl(hwc->config_base, val); + (void)wrmsrl_safe(hwc->config_base, val); } PMU_FORMAT_ATTR(event, "config:0-7" ); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 61cdf7fdf099..3e215ba68766 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -466,7 +466,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) task->thread.gs = addr; if (doit) { load_gs_index(0); - ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); + ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); } } put_cpu(); @@ -494,7 +494,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) /* set the selector to 0 to not confuse __switch_to */ loadsegment(fs, 0); - ret = checking_wrmsrl(MSR_FS_BASE, addr); + ret = wrmsrl_safe(MSR_FS_BASE, addr); } } put_cpu(); diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 66e6d9359826..0faad646f5fd 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -205,9 +205,9 @@ void syscall32_cpu_init(void) { /* Load these always in case some future AMD CPU supports SYSENTER from compat mode too. */ - checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL); - checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); + wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); wrmsrl(MSR_CSTAR, ia32_cstar_target); } -- cgit v1.2.3 From 9d8e10667624ea6411f04495aef1fa4a8a778ee8 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 7 Jun 2012 15:14:49 +0200 Subject: x86/apic: Factor out default vector_allocation_domain() operation Signed-off-by: Alexander Gordeev Acked-by: Suresh Siddha Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20120607131449.GC4759@dhcp-26-207.brq.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 21 +++++++++++++++++++++ arch/x86/kernel/apic/apic_flat_64.c | 22 +--------------------- arch/x86/kernel/apic/apic_noop.c | 3 +-- arch/x86/kernel/apic/apic_numachip.c | 8 +------- arch/x86/kernel/apic/bigsmp_32.c | 8 +------- arch/x86/kernel/apic/es7000_32.c | 19 ++----------------- arch/x86/kernel/apic/numaq_32.c | 16 +--------------- arch/x86/kernel/apic/probe_32.c | 17 +---------------- arch/x86/kernel/apic/summit_32.c | 16 +--------------- arch/x86/kernel/apic/x2apic_phys.c | 11 +---------- arch/x86/kernel/apic/x2apic_uv_x.c | 8 +------- 11 files changed, 32 insertions(+), 117 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index bef571769e68..feb2dbdae9ec 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -615,6 +615,27 @@ extern unsigned int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, const struct cpumask *andmask); +static inline void +flat_vector_allocation_domain(int cpu, struct cpumask *retmask) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + cpumask_clear(retmask); + cpumask_bits(retmask)[0] = APIC_ALL_CPUS; +} + +static inline void +default_vector_allocation_domain(int cpu, struct cpumask *retmask) +{ + cpumask_copy(retmask, cpumask_of(cpu)); +} + static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) { return physid_isset(apicid, *map); diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 55b97ce4fa19..bddc92566d0a 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -36,20 +36,6 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 1; } -static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - /* * Set up the logical destination ID. * @@ -257,12 +243,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) { default_send_IPI_mask_sequence_phys(cpumask, vector); @@ -309,7 +289,7 @@ static struct apic apic_physflat = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .vector_allocation_domain = physflat_vector_allocation_domain, + .vector_allocation_domain = default_vector_allocation_domain, /* not needed, but shouldn't hurt: */ .init_apic_ldr = flat_init_apic_ldr, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 7c3dd4fe0686..3e43cf528939 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -104,8 +104,7 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) { if (cpu != 0) pr_warning("APIC: Vector allocated for non-BSP cpu\n"); - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); + cpumask_copy(retmask, cpumask_of(cpu)); } static u32 noop_apic_read(u32 reg) diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index dba4bf2ed566..c028132ad358 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -72,12 +72,6 @@ static int numachip_phys_pkg_id(int initial_apic_id, int index_msb) return initial_apic_id >> index_msb; } -static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) { union numachip_csr_g3_ext_irq_gen int_gen; @@ -222,7 +216,7 @@ static struct apic apic_numachip __refconst = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .vector_allocation_domain = numachip_vector_allocation_domain, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, .ioapic_phys_id_map = NULL, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 907aa3d112a6..df342fe4d6aa 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -142,12 +142,6 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { { } /* NULL entry stops DMI scanning */ }; -static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - static int probe_bigsmp(void) { if (def_to_bigsmp) @@ -176,7 +170,7 @@ static struct apic apic_bigsmp = { .check_apicid_used = bigsmp_check_apicid_used, .check_apicid_present = bigsmp_check_apicid_present, - .vector_allocation_domain = bigsmp_vector_allocation_domain, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = bigsmp_init_apic_ldr, .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index db4ab1be3c79..3c42865757e2 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -394,21 +394,6 @@ static void es7000_enable_apic_mode(void) WARN(1, "Command failed, status = %x\n", mip_status); } -static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - - static void es7000_wait_for_init_deassert(atomic_t *deassert) { while (!atomic_read(deassert)) @@ -638,7 +623,7 @@ static struct apic __refdata apic_es7000_cluster = { .check_apicid_used = es7000_check_apicid_used, .check_apicid_present = es7000_check_apicid_present, - .vector_allocation_domain = es7000_vector_allocation_domain, + .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = es7000_init_apic_ldr_cluster, .ioapic_phys_id_map = es7000_ioapic_phys_id_map, @@ -705,7 +690,7 @@ static struct apic __refdata apic_es7000 = { .check_apicid_used = es7000_check_apicid_used, .check_apicid_present = es7000_check_apicid_present, - .vector_allocation_domain = es7000_vector_allocation_domain, + .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = es7000_init_apic_ldr, .ioapic_phys_id_map = es7000_ioapic_phys_id_map, diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index f00a68cca37a..eb2d466fd81a 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -441,20 +441,6 @@ static int probe_numaq(void) return found_numaq; } -static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - static void numaq_setup_portio_remap(void) { int num_quads = num_online_nodes(); @@ -491,7 +477,7 @@ static struct apic __refdata apic_numaq = { .check_apicid_used = numaq_check_apicid_used, .check_apicid_present = numaq_check_apicid_present, - .vector_allocation_domain = numaq_vector_allocation_domain, + .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = numaq_init_apic_ldr, .ioapic_phys_id_map = numaq_ioapic_phys_id_map, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 71b6ac48ab26..2c6f003b2e4b 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -66,21 +66,6 @@ static void setup_apic_flat_routing(void) #endif } -static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* - * Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - /* should be called last. */ static int probe_default(void) { @@ -105,7 +90,7 @@ static struct apic apic_default = { .check_apicid_used = default_check_apicid_used, .check_apicid_present = default_check_apicid_present, - .vector_allocation_domain = default_vector_allocation_domain, + .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 659897c00755..35d254c1fec2 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -320,20 +320,6 @@ static int probe_summit(void) return 0; } -static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - #ifdef CONFIG_X86_SUMMIT_NUMA static struct rio_table_hdr *rio_table_hdr; static struct scal_detail *scal_devs[MAX_NUMNODES]; @@ -509,7 +495,7 @@ static struct apic apic_summit = { .check_apicid_used = summit_check_apicid_used, .check_apicid_present = summit_check_apicid_present, - .vector_allocation_domain = summit_vector_allocation_domain, + .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = summit_init_apic_ldr, .ioapic_phys_id_map = summit_ioapic_phys_id_map, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index f730269edef2..f109388a0e80 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -88,15 +88,6 @@ static int x2apic_phys_probe(void) return apic == &apic_x2apic_phys; } -/* - * Each logical cpu is in its own vector allocation domain. - */ -static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - static struct apic apic_x2apic_phys = { .name = "physical x2apic", @@ -114,7 +105,7 @@ static struct apic apic_x2apic_phys = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .vector_allocation_domain = x2apic_vector_allocation_domain, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, .ioapic_phys_id_map = NULL, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 16efb92bfea5..df89a7d78748 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -185,12 +185,6 @@ EXPORT_SYMBOL_GPL(uv_possible_blades); unsigned long sn_rtc_cycles_per_second; EXPORT_SYMBOL(sn_rtc_cycles_per_second); -static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) { #ifdef CONFIG_SMP @@ -363,7 +357,7 @@ static struct apic __refdata apic_x2apic_uv_x = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .vector_allocation_domain = uv_vector_allocation_domain, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = uv_init_apic_ldr, .ioapic_phys_id_map = NULL, -- cgit v1.2.3 From 1bccd58bfffc5a677051937b332b71f0686187c1 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 7 Jun 2012 15:15:15 +0200 Subject: x86/apic: Try to spread IRQ vectors to different priority levels When assigning a new vector it is primarially done by adding 8 to the previously given out vector number. Hence, two consequently allocated vector numbers would likely fall into the same priority level. Try to spread vector numbers to different priority levels better by changing the step from 8 to 16. Signed-off-by: Alexander Gordeev Acked-by: Suresh Siddha Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20120607131514.GD4759@dhcp-26-207.brq.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 74c569791e75..05af3d341aaa 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1112,7 +1112,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) * 0x80, because int 0x80 is hm, kind of importantish. ;) */ static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; - static int current_offset = VECTOR_OFFSET_START % 8; + static int current_offset = VECTOR_OFFSET_START % 16; unsigned int old_vector; int cpu, err; cpumask_var_t tmp_mask; @@ -1148,10 +1148,9 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) vector = current_vector; offset = current_offset; next: - vector += 8; + vector += 16; if (vector >= first_system_vector) { - /* If out of vectors on large boxen, must share them. */ - offset = (offset + 1) % 8; + offset = (offset + 1) % 16; vector = FIRST_EXTERNAL_VECTOR + offset; } if (unlikely(current_vector == vector)) -- cgit v1.2.3 From 8637e38aff14d048b649075114023023a2e80fba Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 7 Jun 2012 15:15:44 +0200 Subject: x86/apic: Avoid useless scanning thru a cpumask in assign_irq_vector() In case of static vector allocation domains (i.e. flat) if all vector numbers are exhausted, an attempt to assign a new vector will lead to useless scans through all CPUs in the cpumask, even though it is known that each new pass would fail. Make this corner case less painful by letting report whether the vector allocation domain depends on passed arguments or not and stop scanning early. The same could have been achived by introducing a static flag to the apic operations. But let's allow vector_allocation_domain() have more intelligence here and decide dynamically, in case we would need it in the future. Signed-off-by: Alexander Gordeev Acked-by: Suresh Siddha Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20120607131542.GE4759@dhcp-26-207.brq.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 8 +++++--- arch/x86/kernel/apic/apic_noop.c | 3 ++- arch/x86/kernel/apic/io_apic.c | 12 +++++++++--- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index feb2dbdae9ec..e3fecd50d5ca 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -306,7 +306,7 @@ struct apic { unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); unsigned long (*check_apicid_present)(int apicid); - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); + bool (*vector_allocation_domain)(int cpu, struct cpumask *retmask); void (*init_apic_ldr)(void); void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); @@ -615,7 +615,7 @@ extern unsigned int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, const struct cpumask *andmask); -static inline void +static inline bool flat_vector_allocation_domain(int cpu, struct cpumask *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus @@ -628,12 +628,14 @@ flat_vector_allocation_domain(int cpu, struct cpumask *retmask) */ cpumask_clear(retmask); cpumask_bits(retmask)[0] = APIC_ALL_CPUS; + return false; } -static inline void +static inline bool default_vector_allocation_domain(int cpu, struct cpumask *retmask) { cpumask_copy(retmask, cpumask_of(cpu)); + return true; } static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 3e43cf528939..ac9edf247b15 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -100,11 +100,12 @@ static unsigned long noop_check_apicid_present(int bit) return physid_isset(bit, phys_cpu_present_map); } -static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) +static bool noop_vector_allocation_domain(int cpu, struct cpumask *retmask) { if (cpu != 0) pr_warning("APIC: Vector allocated for non-BSP cpu\n"); cpumask_copy(retmask, cpumask_of(cpu)); + return true; } static u32 noop_apic_read(u32 reg) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 05af3d341aaa..4061a7dee5c9 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1137,8 +1137,9 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) for_each_cpu_and(cpu, mask, cpu_online_mask) { int new_cpu; int vector, offset; + bool more_domains; - apic->vector_allocation_domain(cpu, tmp_mask); + more_domains = apic->vector_allocation_domain(cpu, tmp_mask); if (cpumask_subset(tmp_mask, cfg->domain)) { free_cpumask_var(tmp_mask); @@ -1153,8 +1154,13 @@ next: offset = (offset + 1) % 16; vector = FIRST_EXTERNAL_VECTOR + offset; } - if (unlikely(current_vector == vector)) - continue; + + if (unlikely(current_vector == vector)) { + if (more_domains) + continue; + else + break; + } if (test_bit(vector, used_vectors)) goto next; -- cgit v1.2.3 From ff164324123c0fe181d8de7dadcc7b3fbe25f2cf Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 7 Jun 2012 15:15:59 +0200 Subject: x86/apic: Make cpu_mask_to_apicid() operations return error code Current cpu_mask_to_apicid() and cpu_mask_to_apicid_and() implementations have few shortcomings: 1. A value returned by cpu_mask_to_apicid() is written to hardware registers unconditionally. Should BAD_APICID get ever returned it will be written to a hardware too. But the value of BAD_APICID is not universal across all hardware in all modes and might cause unexpected results, i.e. interrupts might get routed to CPUs that are not configured to receive it. 2. Because the value of BAD_APICID is not universal it is counter- intuitive to return it for a hardware where it does not make sense (i.e. x2apic). 3. cpu_mask_to_apicid_and() operation is thought as an complement to cpu_mask_to_apicid() that only applies a AND mask on top of a cpumask being passed. Yet, as consequence of 18374d8 commit the two operations are inconsistent in that of: cpu_mask_to_apicid() should not get a offline CPU with the cpumask cpu_mask_to_apicid_and() should not fail and return BAD_APICID These limitations are impossible to realize just from looking at the operations prototypes. Most of these shortcomings are resolved by returning a error code instead of BAD_APICID. As the result, faults are reported back early rather than possibilities to cause a unexpected behaviour exist (in case of [1]). The only exception is setup_timer_IRQ0_pin() routine. Although obviously controversial to this fix, its existing behaviour is preserved to not break the fragile check_timer() and would better addressed in a separate fix. Signed-off-by: Alexander Gordeev Acked-by: Suresh Siddha Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20120607131559.GF4759@dhcp-26-207.brq.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 44 ++++++++++++------ arch/x86/kernel/apic/apic.c | 33 +++++++------ arch/x86/kernel/apic/es7000_32.c | 21 +++++---- arch/x86/kernel/apic/io_apic.c | 88 +++++++++++++++++++++++------------ arch/x86/kernel/apic/numaq_32.c | 14 ++++-- arch/x86/kernel/apic/summit_32.c | 22 +++++---- arch/x86/kernel/apic/x2apic_cluster.c | 24 ++++++---- arch/x86/kernel/apic/x2apic_uv_x.c | 27 +++++++---- arch/x86/platform/uv/uv_irq.c | 7 ++- drivers/iommu/intel_irq_remapping.c | 13 ++++-- 10 files changed, 188 insertions(+), 105 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index e3fecd50d5ca..ae91f9c7e360 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -331,9 +331,11 @@ struct apic { unsigned long (*set_apic_id)(unsigned int id); unsigned long apic_id_mask; - unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask); + int (*cpu_mask_to_apicid)(const struct cpumask *cpumask, + unsigned int *apicid); + int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, + const struct cpumask *andmask, + unsigned int *apicid); /* ipi */ void (*send_IPI_mask)(const struct cpumask *mask, int vector); @@ -591,29 +593,45 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) #endif -static inline unsigned int -flat_cpu_mask_to_apicid(const struct cpumask *cpumask) +static inline int +__flat_cpu_mask_to_apicid(unsigned long cpu_mask, unsigned int *apicid) { - return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; + cpu_mask &= APIC_ALL_CPUS; + if (likely(cpu_mask)) { + *apicid = (unsigned int)cpu_mask; + return 0; + } else { + return -EINVAL; + } } -static inline unsigned int +static inline int +flat_cpu_mask_to_apicid(const struct cpumask *cpumask, + unsigned int *apicid) +{ + return __flat_cpu_mask_to_apicid(cpumask_bits(cpumask)[0], apicid); +} + +static inline int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) + const struct cpumask *andmask, + unsigned int *apicid) { unsigned long mask1 = cpumask_bits(cpumask)[0]; unsigned long mask2 = cpumask_bits(andmask)[0]; unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; - return (unsigned int)(mask1 & mask2 & mask3); + return __flat_cpu_mask_to_apicid(mask1 & mask2 & mask3, apicid); } -extern unsigned int -default_cpu_mask_to_apicid(const struct cpumask *cpumask); +extern int +default_cpu_mask_to_apicid(const struct cpumask *cpumask, + unsigned int *apicid); -extern unsigned int +extern int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask); + const struct cpumask *andmask, + unsigned int *apicid); static inline bool flat_vector_allocation_domain(int cpu, struct cpumask *retmask) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 96a2608252f1..b8d92606f84f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2123,24 +2123,26 @@ void default_init_apic_ldr(void) apic_write(APIC_LDR, val); } -unsigned int default_cpu_mask_to_apicid(const struct cpumask *cpumask) +static inline int __default_cpu_to_apicid(int cpu, unsigned int *apicid) { - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - cpu = cpumask_first(cpumask); - if (likely((unsigned)cpu < nr_cpu_ids)) - return per_cpu(x86_cpu_to_apicid, cpu); + if (likely((unsigned int)cpu < nr_cpu_ids)) { + *apicid = per_cpu(x86_cpu_to_apicid, cpu); + return 0; + } else { + return -EINVAL; + } +} - return BAD_APICID; +int default_cpu_mask_to_apicid(const struct cpumask *cpumask, + unsigned int *apicid) +{ + int cpu = cpumask_first(cpumask); + return __default_cpu_to_apicid(cpu, apicid); } -unsigned int -default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask, + unsigned int *apicid) { int cpu; @@ -2148,7 +2150,8 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - return per_cpu(x86_cpu_to_apicid, cpu); + + return __default_cpu_to_apicid(cpu, apicid); } /* diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 3c42865757e2..515ebb00a9fc 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -525,7 +525,8 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid) return 1; } -static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask) +static int +es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) { unsigned int round = 0; int cpu, uninitialized_var(apicid); @@ -539,31 +540,33 @@ static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask) if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { WARN(1, "Not a valid mask!"); - return BAD_APICID; + return -EINVAL; } apicid = new_apicid; round++; } - return apicid; + *dest_id = apicid; + return 0; } -static unsigned int +static int es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) + const struct cpumask *andmask, + unsigned int *apicid) { - int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); + *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); cpumask_var_t cpumask; if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; + return 0; cpumask_and(cpumask, inmask, andmask); cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = es7000_cpu_mask_to_apicid(cpumask); + es7000_cpu_mask_to_apicid(cpumask, apicid); free_cpumask_var(cpumask); - return apicid; + return 0; } static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 4061a7dee5c9..0deb773404e5 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1359,7 +1359,14 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, if (assign_irq_vector(irq, cfg, apic->target_cpus())) return; - dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); + if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(), + &dest)) { + pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n", + mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); + __clear_irq_vector(irq, cfg); + + return; + } apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " @@ -1474,6 +1481,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, unsigned int pin, int vector) { struct IO_APIC_route_entry entry; + unsigned int dest; if (irq_remapping_enabled) return; @@ -1484,9 +1492,12 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, * We use logical delivery to get the timer IRQ * to the first CPU. */ + if (unlikely(apic->cpu_mask_to_apicid(apic->target_cpus(), &dest))) + dest = BAD_APICID; + entry.dest_mode = apic->irq_dest_mode; entry.mask = 0; /* don't mask IRQ for edge */ - entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); + entry.dest = dest; entry.delivery_mode = apic->irq_delivery_mode; entry.polarity = 0; entry.trigger = 0; @@ -2245,16 +2256,25 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, unsigned int *dest_id) { struct irq_cfg *cfg = data->chip_data; + unsigned int irq = data->irq; + int err; if (!cpumask_intersects(mask, cpu_online_mask)) - return -1; + return -EINVAL; - if (assign_irq_vector(data->irq, data->chip_data, mask)) - return -1; + err = assign_irq_vector(irq, cfg, mask); + if (err) + return err; + + err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id); + if (err) { + if (assign_irq_vector(irq, cfg, data->affinity)) + pr_err("Failed to recover vector for irq %d\n", irq); + return err; + } cpumask_copy(data->affinity, mask); - *dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain); return 0; } @@ -3040,7 +3060,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, if (err) return err; - dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); + err = apic->cpu_mask_to_apicid_and(cfg->domain, + apic->target_cpus(), &dest); + if (err) + return err; if (irq_remapped(cfg)) { compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); @@ -3361,6 +3384,8 @@ static struct irq_chip ht_irq_chip = { int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) { struct irq_cfg *cfg; + struct ht_irq_msg msg; + unsigned dest; int err; if (disable_apic) @@ -3368,36 +3393,37 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) cfg = irq_cfg(irq); err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (!err) { - struct ht_irq_msg msg; - unsigned dest; + if (err) + return err; - dest = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus()); + err = apic->cpu_mask_to_apicid_and(cfg->domain, + apic->target_cpus(), &dest); + if (err) + return err; - msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); - msg.address_lo = - HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(dest) | - HT_IRQ_LOW_VECTOR(cfg->vector) | - ((apic->irq_dest_mode == 0) ? - HT_IRQ_LOW_DM_PHYSICAL : - HT_IRQ_LOW_DM_LOGICAL) | - HT_IRQ_LOW_RQEOI_EDGE | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - HT_IRQ_LOW_MT_FIXED : - HT_IRQ_LOW_MT_ARBITRATED) | - HT_IRQ_LOW_IRQ_MASKED; + msg.address_lo = + HT_IRQ_LOW_BASE | + HT_IRQ_LOW_DEST_ID(dest) | + HT_IRQ_LOW_VECTOR(cfg->vector) | + ((apic->irq_dest_mode == 0) ? + HT_IRQ_LOW_DM_PHYSICAL : + HT_IRQ_LOW_DM_LOGICAL) | + HT_IRQ_LOW_RQEOI_EDGE | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + HT_IRQ_LOW_MT_FIXED : + HT_IRQ_LOW_MT_ARBITRATED) | + HT_IRQ_LOW_IRQ_MASKED; - write_ht_irq_msg(irq, &msg); + write_ht_irq_msg(irq, &msg); - irq_set_chip_and_handler_name(irq, &ht_irq_chip, - handle_edge_irq, "edge"); + irq_set_chip_and_handler_name(irq, &ht_irq_chip, + handle_edge_irq, "edge"); - dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); - } - return err; + dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); + + return 0; } #endif /* CONFIG_HT_IRQ */ diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index eb2d466fd81a..2b55514c328b 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -406,16 +406,20 @@ static inline int numaq_check_phys_apicid_present(int phys_apicid) * We use physical apicids here, not logical, so just return the default * physical broadcast to stop people from breaking us */ -static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask) +static int +numaq_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid) { - return 0x0F; + *apicid = 0x0F; + return 0; } -static inline unsigned int +static int numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) + const struct cpumask *andmask, + unsigned int *apicid) { - return 0x0F; + *apicid = 0x0F; + return 0; } /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 35d254c1fec2..5766d84f12d6 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -263,7 +263,8 @@ static int summit_check_phys_apicid_present(int physical_apicid) return 1; } -static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask) +static int +summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) { unsigned int round = 0; int cpu, apicid = 0; @@ -276,30 +277,33 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask) if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { printk("%s: Not a valid mask!\n", __func__); - return BAD_APICID; + return -EINVAL; } apicid |= new_apicid; round++; } - return apicid; + *dest_id = apicid; + return 0; } -static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) +static int +summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask, + unsigned int *apicid) { - int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); + *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); cpumask_var_t cpumask; if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; + return 0; cpumask_and(cpumask, inmask, andmask); cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = summit_cpu_mask_to_apicid(cpumask); + summit_cpu_mask_to_apicid(cpumask, apicid); free_cpumask_var(cpumask); - return apicid; + return 0; } /* diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 612622c47dfb..5f86f79335f4 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -96,24 +96,26 @@ static void x2apic_send_IPI_all(int vector) __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); } -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) +static int +x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid) { int cpu = cpumask_first(cpumask); - u32 dest = 0; int i; - if (cpu > nr_cpu_ids) - return BAD_APICID; + if (cpu >= nr_cpu_ids) + return -EINVAL; + *apicid = 0; for_each_cpu_and(i, cpumask, per_cpu(cpus_in_cluster, cpu)) - dest |= per_cpu(x86_cpu_to_logical_apicid, i); + *apicid |= per_cpu(x86_cpu_to_logical_apicid, i); - return dest; + return 0; } -static unsigned int +static int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) + const struct cpumask *andmask, + unsigned int *apicid) { u32 dest = 0; u16 cluster; @@ -128,7 +130,7 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, } if (!dest) - return BAD_APICID; + return -EINVAL; for_each_cpu_and(i, cpumask, andmask) { if (!cpumask_test_cpu(i, cpu_online_mask)) @@ -138,7 +140,9 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, dest |= per_cpu(x86_cpu_to_logical_apicid, i); } - return dest; + *apicid = dest; + + return 0; } static void init_x2apic_ldr(void) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index df89a7d78748..2f3030fef31e 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -269,23 +269,31 @@ static void uv_init_apic_ldr(void) { } -static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) +static inline int __uv_cpu_to_apicid(int cpu, unsigned int *apicid) +{ + if (likely((unsigned int)cpu < nr_cpu_ids)) { + *apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; + return 0; + } else { + return -EINVAL; + } +} + +static int +uv_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid) { /* * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ int cpu = cpumask_first(cpumask); - - if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; - else - return BAD_APICID; + return __uv_cpu_to_apicid(cpu, apicid); } -static unsigned int +static int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) + const struct cpumask *andmask, + unsigned int *apicid) { int cpu; @@ -297,7 +305,8 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; + + return __uv_cpu_to_apicid(cpu, apicid); } static unsigned int x2apic_get_apic_id(unsigned long x) diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index f25c2765a5c9..dd1ff39a464c 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -135,6 +135,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long mmr_value; struct uv_IO_APIC_route_entry *entry; int mmr_pnode, err; + unsigned int dest; BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); @@ -143,6 +144,10 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, if (err != 0) return err; + err = apic->cpu_mask_to_apicid(eligible_cpu, &dest); + if (err != 0) + return err; + if (limit == UV_AFFINITY_CPU) irq_set_status_flags(irq, IRQ_NO_BALANCING); else @@ -159,7 +164,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, entry->polarity = 0; entry->trigger = 0; entry->mask = 0; - entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); + entry->dest = dest; mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 6d347064b8b0..dafbad06390a 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -924,6 +924,7 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irq_cfg *cfg = data->chip_data; unsigned int dest, irq = data->irq; struct irte irte; + int err; if (!cpumask_intersects(mask, cpu_online_mask)) return -EINVAL; @@ -931,10 +932,16 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, if (get_irte(irq, &irte)) return -EBUSY; - if (assign_irq_vector(irq, cfg, mask)) - return -EBUSY; + err = assign_irq_vector(irq, cfg, mask); + if (err) + return err; - dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); + err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest); + if (err) { + if (assign_irq_vector(irq, cfg, data->affinity)); + pr_err("Failed to recover vector for irq %d\n", irq); + return err; + } irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); -- cgit v1.2.3 From 4988a40c3981212fa8c64da68722affc1cb6697a Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 7 Jun 2012 15:16:25 +0200 Subject: x86/apic: Make cpu_mask_to_apicid() operations check cpu_online_mask Currently cpu_mask_to_apicid() should not get a offline CPU with the cpumask. Otherwise some apic drivers might try to access non-existent per-cpu variables (i.e. x2apic). In that regard cpu_mask_to_apicid() and cpu_mask_to_apicid_and() operations are inconsistent. This fix makes the two operations do not rely on calling functions and always return the apicid for only online CPUs. As result, the meaning and implementations of cpu_mask_to_apicid() and cpu_mask_to_apicid_and() operations become straight. Signed-off-by: Alexander Gordeev Acked-by: Suresh Siddha Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20120607131624.GG4759@dhcp-26-207.brq.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 6 ++---- arch/x86/kernel/apic/apic.c | 2 +- arch/x86/kernel/apic/es7000_32.c | 3 +-- arch/x86/kernel/apic/summit_32.c | 3 +-- arch/x86/kernel/apic/x2apic_cluster.c | 2 +- arch/x86/kernel/apic/x2apic_uv_x.c | 2 +- 6 files changed, 7 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index ae91f9c7e360..1ed3eead2039 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -596,7 +596,7 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) static inline int __flat_cpu_mask_to_apicid(unsigned long cpu_mask, unsigned int *apicid) { - cpu_mask &= APIC_ALL_CPUS; + cpu_mask = cpu_mask & APIC_ALL_CPUS & cpumask_bits(cpu_online_mask)[0]; if (likely(cpu_mask)) { *apicid = (unsigned int)cpu_mask; return 0; @@ -619,9 +619,7 @@ flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, { unsigned long mask1 = cpumask_bits(cpumask)[0]; unsigned long mask2 = cpumask_bits(andmask)[0]; - unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; - - return __flat_cpu_mask_to_apicid(mask1 & mask2 & mask3, apicid); + return __flat_cpu_mask_to_apicid(mask1 & mask2, apicid); } extern int diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b8d92606f84f..7e9bbe73bc5a 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2136,7 +2136,7 @@ static inline int __default_cpu_to_apicid(int cpu, unsigned int *apicid) int default_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid) { - int cpu = cpumask_first(cpumask); + int cpu = cpumask_first_and(cpumask, cpu_online_mask); return __default_cpu_to_apicid(cpu, apicid); } diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 515ebb00a9fc..b35cfb9b6962 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -534,7 +534,7 @@ es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) /* * The cpus in the mask must all be on the apic cluster. */ - for_each_cpu(cpu, cpumask) { + for_each_cpu_and(cpu, cpumask, cpu_online_mask) { int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { @@ -561,7 +561,6 @@ es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, return 0; cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); es7000_cpu_mask_to_apicid(cpumask, apicid); free_cpumask_var(cpumask); diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 5766d84f12d6..79d360f6729e 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -272,7 +272,7 @@ summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) /* * The cpus in the mask must all be on the apic cluster. */ - for_each_cpu(cpu, cpumask) { + for_each_cpu_and(cpu, cpumask, cpu_online_mask) { int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { @@ -298,7 +298,6 @@ summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, return 0; cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); summit_cpu_mask_to_apicid(cpumask, apicid); free_cpumask_var(cpumask); diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 5f86f79335f4..23a46cf5b6fd 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -99,7 +99,7 @@ static void x2apic_send_IPI_all(int vector) static int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid) { - int cpu = cpumask_first(cpumask); + int cpu = cpumask_first_and(cpumask, cpu_online_mask); int i; if (cpu >= nr_cpu_ids) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 2f3030fef31e..307aa076bd62 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -286,7 +286,7 @@ uv_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - int cpu = cpumask_first(cpumask); + int cpu = cpumask_first_and(cpumask, cpu_online_mask); return __uv_cpu_to_apicid(cpu, apicid); } -- cgit v1.2.3 From bf947fcb77ff858f223c49c76e2d130095fa2585 Mon Sep 17 00:00:00 2001 From: Donald Dutile Date: Mon, 4 Jun 2012 17:29:01 -0400 Subject: iommu/dmar: Replace printks with appropriate pr_*() Just some cleanup so next patch can keep the info printing the same way throughout the file. Replace printk(KERN_* with pr_*() functions. Signed-off-by: Donald Dutile Cc: iommu@lists.linux-foundation.org Cc: chrisw@redhat.com Cc: suresh.b.siddha@intel.com Cc: dwmw2@infradead.org Link: http://lkml.kernel.org/r/1338845342-12464-2-git-send-email-ddutile@redhat.com Signed-off-by: Ingo Molnar --- drivers/iommu/dmar.c | 83 ++++++++++++++++++++++------------------------------ 1 file changed, 35 insertions(+), 48 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 3a74e4410fc0..1e5a10de3471 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -83,15 +83,14 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, * ignore it */ if (!bus) { - printk(KERN_WARNING - PREFIX "Device scope bus [%d] not found\n", - scope->bus); + pr_warn(PREFIX "Device scope bus [%d] not found\n", + scope->bus); break; } pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn)); if (!pdev) { - printk(KERN_WARNING PREFIX - "Device scope device [%04x:%02x:%02x.%02x] not found\n", + pr_warn(PREFIX "Device scope device" + "[%04x:%02x:%02x.%02x] not found\n", segment, bus->number, path->dev, path->fn); break; } @@ -100,9 +99,9 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, bus = pdev->subordinate; } if (!pdev) { - printk(KERN_WARNING PREFIX - "Device scope device [%04x:%02x:%02x.%02x] not found\n", - segment, scope->bus, path->dev, path->fn); + pr_warn(PREFIX + "Device scope device [%04x:%02x:%02x.%02x] not found\n", + segment, scope->bus, path->dev, path->fn); *dev = NULL; return 0; } @@ -110,8 +109,7 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, pdev->subordinate) || (scope->entry_type == \ ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) { pci_dev_put(pdev); - printk(KERN_WARNING PREFIX - "Device scope type does not match for %s\n", + pr_warn(PREFIX "Device scope type does not match for %s\n", pci_name(pdev)); return -EINVAL; } @@ -134,8 +132,7 @@ int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) (*cnt)++; else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) { - printk(KERN_WARNING PREFIX - "Unsupported device scope\n"); + pr_warn(PREFIX "Unsupported device scope\n"); } start += scope->length; } @@ -261,25 +258,23 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) case ACPI_DMAR_TYPE_HARDWARE_UNIT: drhd = container_of(header, struct acpi_dmar_hardware_unit, header); - printk (KERN_INFO PREFIX - "DRHD base: %#016Lx flags: %#x\n", + pr_info(PREFIX "DRHD base: %#016Lx flags: %#x\n", (unsigned long long)drhd->address, drhd->flags); break; case ACPI_DMAR_TYPE_RESERVED_MEMORY: rmrr = container_of(header, struct acpi_dmar_reserved_memory, header); - printk (KERN_INFO PREFIX - "RMRR base: %#016Lx end: %#016Lx\n", + pr_info(PREFIX "RMRR base: %#016Lx end: %#016Lx\n", (unsigned long long)rmrr->base_address, (unsigned long long)rmrr->end_address); break; case ACPI_DMAR_TYPE_ATSR: atsr = container_of(header, struct acpi_dmar_atsr, header); - printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags); + pr_info(PREFIX "ATSR flags: %#x\n", atsr->flags); break; case ACPI_DMAR_HARDWARE_AFFINITY: rhsa = container_of(header, struct acpi_dmar_rhsa, header); - printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n", + pr_info(PREFIX "RHSA base: %#016Lx proximity domain: %#x\n", (unsigned long long)rhsa->base_address, rhsa->proximity_domain); break; @@ -299,7 +294,7 @@ static int __init dmar_table_detect(void) &dmar_tbl_size); if (ACPI_SUCCESS(status) && !dmar_tbl) { - printk (KERN_WARNING PREFIX "Unable to map DMAR\n"); + pr_warn(PREFIX "Unable to map DMAR\n"); status = AE_NOT_FOUND; } @@ -333,20 +328,18 @@ parse_dmar_table(void) return -ENODEV; if (dmar->width < PAGE_SHIFT - 1) { - printk(KERN_WARNING PREFIX "Invalid DMAR haw\n"); + pr_warn(PREFIX "Invalid DMAR haw\n"); return -EINVAL; } - printk (KERN_INFO PREFIX "Host address width %d\n", - dmar->width + 1); + pr_info(PREFIX "Host address width %d\n", dmar->width + 1); entry_header = (struct acpi_dmar_header *)(dmar + 1); while (((unsigned long)entry_header) < (((unsigned long)dmar) + dmar_tbl->length)) { /* Avoid looping forever on bad ACPI tables */ if (entry_header->length == 0) { - printk(KERN_WARNING PREFIX - "Invalid 0-length structure\n"); + pr_warn(PREFIX "Invalid 0-length structure\n"); ret = -EINVAL; break; } @@ -369,8 +362,7 @@ parse_dmar_table(void) #endif break; default: - printk(KERN_WARNING PREFIX - "Unknown DMAR structure type %d\n", + pr_warn(PREFIX "Unknown DMAR structure type %d\n", entry_header->type); ret = 0; /* for forward compatibility */ break; @@ -469,12 +461,12 @@ int __init dmar_table_init(void) ret = parse_dmar_table(); if (ret) { if (ret != -ENODEV) - printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); + pr_info(PREFIX "parse DMAR table failure.\n"); return ret; } if (list_empty(&dmar_drhd_units)) { - printk(KERN_INFO PREFIX "No DMAR devices found\n"); + pr_info(PREFIX "No DMAR devices found\n"); return -ENODEV; } @@ -506,8 +498,7 @@ int __init check_zero_address(void) (((unsigned long)dmar) + dmar_tbl->length)) { /* Avoid looping forever on bad ACPI tables */ if (entry_header->length == 0) { - printk(KERN_WARNING PREFIX - "Invalid 0-length structure\n"); + pr_warn(PREFIX "Invalid 0-length structure\n"); return 0; } @@ -558,8 +549,8 @@ int __init detect_intel_iommu(void) if (ret && irq_remapping_enabled && cpu_has_x2apic && dmar->flags & 0x1) - printk(KERN_INFO - "Queued invalidation will be enabled to support x2apic and Intr-remapping.\n"); + pr_info("Queued invalidation will be enabled to " + "support x2apic and Intr-remapping.\n"); if (ret && !no_iommu && !iommu_detected && !dmar_disabled) { iommu_detected = 1; @@ -602,7 +593,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE); if (!iommu->reg) { - printk(KERN_ERR "IOMMU: can't map the region\n"); + pr_err("IOMMU: can't map the region\n"); goto error; } iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); @@ -615,15 +606,13 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) agaw = iommu_calculate_agaw(iommu); if (agaw < 0) { - printk(KERN_ERR - "Cannot get a valid agaw for iommu (seq_id = %d)\n", - iommu->seq_id); + pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n", + iommu->seq_id); goto err_unmap; } msagaw = iommu_calculate_max_sagaw(iommu); if (msagaw < 0) { - printk(KERN_ERR - "Cannot get a valid max agaw for iommu (seq_id = %d)\n", + pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n", iommu->seq_id); goto err_unmap; } @@ -640,7 +629,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iounmap(iommu->reg); iommu->reg = ioremap(drhd->reg_base_addr, map_size); if (!iommu->reg) { - printk(KERN_ERR "IOMMU: can't map the region\n"); + pr_err("IOMMU: can't map the region\n"); goto error; } } @@ -710,7 +699,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) if (fault & DMA_FSTS_IQE) { head = readl(iommu->reg + DMAR_IQH_REG); if ((head >> DMAR_IQ_SHIFT) == index) { - printk(KERN_ERR "VT-d detected invalid descriptor: " + pr_err("VT-d detected invalid descriptor: " "low=%llx, high=%llx\n", (unsigned long long)qi->desc[index].low, (unsigned long long)qi->desc[index].high); @@ -1129,15 +1118,14 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, reason = dmar_get_fault_reason(fault_reason, &fault_type); if (fault_type == INTR_REMAP) - printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] " + pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] " "fault index %llx\n" "INTR-REMAP:[fault reason %02d] %s\n", (source_id >> 8), PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr >> 48, fault_reason, reason); else - printk(KERN_ERR - "DMAR:[%s] Request device [%02x:%02x.%d] " + pr_err("DMAR:[%s] Request device [%02x:%02x.%d] " "fault addr %llx \n" "DMAR:[fault reason %02d] %s\n", (type ? "DMA Read" : "DMA Write"), @@ -1157,8 +1145,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id) raw_spin_lock_irqsave(&iommu->register_lock, flag); fault_status = readl(iommu->reg + DMAR_FSTS_REG); if (fault_status) - printk(KERN_ERR "DRHD: handling fault status reg %x\n", - fault_status); + pr_err("DRHD: handling fault status reg %x\n", fault_status); /* TBD: ignore advanced fault log currently */ if (!(fault_status & DMA_FSTS_PPF)) @@ -1224,7 +1211,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu) irq = create_irq(); if (!irq) { - printk(KERN_ERR "IOMMU: no free vectors\n"); + pr_err("IOMMU: no free vectors\n"); return -EINVAL; } @@ -1241,7 +1228,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu) ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu); if (ret) - printk(KERN_ERR "IOMMU: can't request irq\n"); + pr_err("IOMMU: can't request irq\n"); return ret; } @@ -1258,7 +1245,7 @@ int __init enable_drhd_fault_handling(void) ret = dmar_set_interrupt(iommu); if (ret) { - printk(KERN_ERR "DRHD %Lx: failed to enable fault, " + pr_err("DRHD %Lx: failed to enable fault, " " interrupt, ret %d\n", (unsigned long long)drhd->reg_base_addr, ret); return -1; -- cgit v1.2.3 From 6f5cf52114dd87f9ed091678f7dfc8ff21bbe2b3 Mon Sep 17 00:00:00 2001 From: Donald Dutile Date: Mon, 4 Jun 2012 17:29:02 -0400 Subject: iommu/dmar: Reserve mmio space used by the IOMMU, if the BIOS forgets to Intel-iommu initialization doesn't currently reserve the memory used for the IOMMU registers. This can allow the pci resource allocator to assign a device BAR to the same address as the IOMMU registers. This can cause some not so nice side affects when the driver ioremap's that region. Introduced two helper functions to map & unmap the IOMMU registers as well as simplify the init and exit paths. Signed-off-by: Donald Dutile Acked-by: Chris Wright Cc: iommu@lists.linux-foundation.org Cc: suresh.b.siddha@intel.com Cc: dwmw2@infradead.org Link: http://lkml.kernel.org/r/1338845342-12464-3-git-send-email-ddutile@redhat.com Signed-off-by: Ingo Molnar --- drivers/iommu/dmar.c | 111 +++++++++++++++++++++++++++++++++----------- include/linux/intel-iommu.h | 2 + 2 files changed, 86 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 1e5a10de3471..9ab6ebf46f7a 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -570,14 +570,89 @@ int __init detect_intel_iommu(void) } +static void unmap_iommu(struct intel_iommu *iommu) +{ + iounmap(iommu->reg); + release_mem_region(iommu->reg_phys, iommu->reg_size); +} + +/** + * map_iommu: map the iommu's registers + * @iommu: the iommu to map + * @phys_addr: the physical address of the base resgister + * + * Memory map the iommu's registers. Start w/ a single page, and + * possibly expand if that turns out to be insufficent. + */ +static int map_iommu(struct intel_iommu *iommu, u64 phys_addr) +{ + int map_size, err=0; + + iommu->reg_phys = phys_addr; + iommu->reg_size = VTD_PAGE_SIZE; + + if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) { + pr_err("IOMMU: can't reserve memory\n"); + err = -EBUSY; + goto out; + } + + iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size); + if (!iommu->reg) { + pr_err("IOMMU: can't map the region\n"); + err = -ENOMEM; + goto release; + } + + iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); + iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); + + if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) { + err = -EINVAL; + warn_invalid_dmar(phys_addr, " returns all ones"); + goto unmap; + } + + /* the registers might be more than one page */ + map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), + cap_max_fault_reg_offset(iommu->cap)); + map_size = VTD_PAGE_ALIGN(map_size); + if (map_size > iommu->reg_size) { + iounmap(iommu->reg); + release_mem_region(iommu->reg_phys, iommu->reg_size); + iommu->reg_size = map_size; + if (!request_mem_region(iommu->reg_phys, iommu->reg_size, + iommu->name)) { + pr_err("IOMMU: can't reserve memory\n"); + err = -EBUSY; + goto out; + } + iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size); + if (!iommu->reg) { + pr_err("IOMMU: can't map the region\n"); + err = -ENOMEM; + goto release; + } + } + err = 0; + goto out; + +unmap: + iounmap(iommu->reg); +release: + release_mem_region(iommu->reg_phys, iommu->reg_size); +out: + return err; +} + int alloc_iommu(struct dmar_drhd_unit *drhd) { struct intel_iommu *iommu; - int map_size; u32 ver; static int iommu_allocated = 0; int agaw = 0; int msagaw = 0; + int err; if (!drhd->reg_base_addr) { warn_invalid_dmar(0, ""); @@ -591,19 +666,13 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->seq_id = iommu_allocated++; sprintf (iommu->name, "dmar%d", iommu->seq_id); - iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE); - if (!iommu->reg) { - pr_err("IOMMU: can't map the region\n"); + err = map_iommu(iommu, drhd->reg_base_addr); + if (err) { + pr_err("IOMMU: failed to map %s\n", iommu->name); goto error; } - iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); - iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); - - if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) { - warn_invalid_dmar(drhd->reg_base_addr, " returns all ones"); - goto err_unmap; - } + err = -EINVAL; agaw = iommu_calculate_agaw(iommu); if (agaw < 0) { pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n", @@ -621,19 +690,6 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->node = -1; - /* the registers might be more than one page */ - map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), - cap_max_fault_reg_offset(iommu->cap)); - map_size = VTD_PAGE_ALIGN(map_size); - if (map_size > VTD_PAGE_SIZE) { - iounmap(iommu->reg); - iommu->reg = ioremap(drhd->reg_base_addr, map_size); - if (!iommu->reg) { - pr_err("IOMMU: can't map the region\n"); - goto error; - } - } - ver = readl(iommu->reg + DMAR_VER_REG); pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n", iommu->seq_id, @@ -648,10 +704,10 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) return 0; err_unmap: - iounmap(iommu->reg); + unmap_iommu(iommu); error: kfree(iommu); - return -1; + return err; } void free_iommu(struct intel_iommu *iommu) @@ -662,7 +718,8 @@ void free_iommu(struct intel_iommu *iommu) free_dmar_iommu(iommu); if (iommu->reg) - iounmap(iommu->reg); + unmap_iommu(iommu); + kfree(iommu); } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index e6ca56de9936..78e2ada50cd5 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -308,6 +308,8 @@ enum { struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ + u64 reg_phys; /* physical address of hw register set */ + u64 reg_size; /* size of hw register set */ u64 cap; u64 ecap; u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ -- cgit v1.2.3 From 7eb9ba5ed312ec6ed9d22259c5da1acb7cf4bd29 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Fri, 8 Jun 2012 15:02:57 +0530 Subject: uprobes: Pass probed vaddr to arch_uprobe_analyze_insn() On RISC architectures like powerpc, instructions are fixed size. Instruction analysis on such platforms is just a matter of (insn % 4). Pass the vaddr at which the uprobe is to be inserted so that arch_uprobe_analyze_insn() can flag misaligned registration requests. Signed-off-by: Ananth N Mavinakaynahalli Cc: michael@ellerman.id.au Cc: antonb@thinktux.localdomain Cc: Paul Mackerras Cc: benh@kernel.crashing.org Cc: peterz@infradead.org Cc: Srikar Dronamraju Cc: Jim Keniston Cc: oleg@redhat.com Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20120608093257.GG13409@in.ibm.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uprobes.h | 2 +- arch/x86/kernel/uprobes.c | 3 ++- kernel/events/uprobes.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 1e9bed14f7ae..f3971bbcd1de 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -48,7 +48,7 @@ struct arch_uprobe_task { #endif }; -extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm); +extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index dc4e910a7d96..36fd42091fa7 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -409,9 +409,10 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. * @mm: the probed address space. * @arch_uprobe: the probepoint information. + * @addr: virtual address at which to install the probepoint * Return 0 on success or a -ve number on error. */ -int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm) +int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) { int ret; struct insn insn; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 8c5e043cd309..b52376d02332 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -706,7 +706,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) return -EEXIST; - ret = arch_uprobe_analyze_insn(&uprobe->arch, mm); + ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, addr); if (ret) return ret; -- cgit v1.2.3 From c7d65a78fc18ed70353baeb7497ec71a7c775ac5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 7 Jun 2012 11:03:00 -0400 Subject: x86: Remove cmpxchg from i386 NMI nesting code I've been informed by someone on LWN called 'slashdot' that some i386 machines do not support a true cmpxchg. The cmpxchg used by the i386 NMI nesting code must be a true cmpxchg as disabling interrupts will not work for NMIs (which is the work around for i386s that do not have a true cmpxchg). This 'slashdot' character also suggested a fix to the issue. As the state of the nesting NMIs goes as follows: NOT_RUNNING -> EXECUTING EXECUTING -> NOT_RUNNING EXECUTING -> LATCHED LATCHED -> EXECUTING Having these states as enum values of: NOT_RUNNING = 0 EXECUTING = 1 LATCHED = 2 Instead of a cmpxchg to make EXECUTING -> NOT_RUNNING a dec_and_test() would work as well. If the dec_and_test brings the state to NOT_RUNNING, that is the same as a cmpxchg succeeding to change EXECUTING to NOT_RUNNING. If a nested NMI were to come in and change it to LATCHED, the dec_and_test() would convert the state to EXECUTING (what we want it to be in such a case anyway). I asked 'slashdot' to post this as a patch, but it never came to be. I decided to do the work instead. Thanks to H. Peter Anvin for suggesting to use this_cpu_dec_and_return() instead of local_dec_and_test(&__get_cpu_var()). Link: http://lwn.net/Articles/484932/ Cc: H. Peter Anvin Signed-off-by: Steven Rostedt --- arch/x86/kernel/nmi.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index a0b2f84457be..a15a88800661 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -365,8 +365,9 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) #ifdef CONFIG_X86_32 /* * For i386, NMIs use the same stack as the kernel, and we can - * add a workaround to the iret problem in C. Simply have 3 states - * the NMI can be in. + * add a workaround to the iret problem in C (preventing nested + * NMIs if an NMI takes a trap). Simply have 3 states the NMI + * can be in: * * 1) not running * 2) executing @@ -383,13 +384,20 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) * If an NMI hits a breakpoint that executes an iret, another * NMI can preempt it. We do not want to allow this new NMI * to run, but we want to execute it when the first one finishes. - * We set the state to "latched", and the first NMI will perform - * an cmpxchg on the state, and if it doesn't successfully - * reset the state to "not running" it will restart the next - * NMI. + * We set the state to "latched", and the exit of the first NMI will + * perform a dec_return, if the result is zero (NOT_RUNNING), then + * it will simply exit the NMI handler. If not, the dec_return + * would have set the state to NMI_EXECUTING (what we want it to + * be when we are running). In this case, we simply jump back + * to rerun the NMI handler again, and restart the 'latched' NMI. + * + * No trap (breakpoint or page fault) should be hit before nmi_restart, + * thus there is no race between the first check of state for NOT_RUNNING + * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs + * at this point. */ enum nmi_states { - NMI_NOT_RUNNING, + NMI_NOT_RUNNING = 0, NMI_EXECUTING, NMI_LATCHED, }; @@ -397,18 +405,17 @@ static DEFINE_PER_CPU(enum nmi_states, nmi_state); #define nmi_nesting_preprocess(regs) \ do { \ - if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ - __get_cpu_var(nmi_state) = NMI_LATCHED; \ + if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \ + this_cpu_write(nmi_state, NMI_LATCHED); \ return; \ } \ - nmi_restart: \ - __get_cpu_var(nmi_state) = NMI_EXECUTING; \ - } while (0) + this_cpu_write(nmi_state, NMI_EXECUTING); \ + } while (0); \ + nmi_restart: #define nmi_nesting_postprocess() \ do { \ - if (cmpxchg(&__get_cpu_var(nmi_state), \ - NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ + if (this_cpu_dec_return(nmi_state)) \ goto nmi_restart; \ } while (0) #else /* x86_64 */ -- cgit v1.2.3 From 70fb74a5420f9caa3e001d65004e4b669124283e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 7 Jun 2012 11:54:37 -0400 Subject: x86: Save cr2 in NMI in case NMIs take a page fault (for i386) Avi Kivity reported that page faults in NMIs could cause havic if the NMI preempted another page fault handler: The recent changes to NMI allow exceptions to take place in NMI handlers, but I think that a #PF (say, due to access to vmalloc space) is still problematic. Consider the sequence #PF (cr2 set by processor) NMI ... #PF (cr2 clobbered) do_page_fault() IRET ... IRET do_page_fault() address = read_cr2() The last line reads the overwritten cr2 value. This is the i386 version, which has the luxury of doing the work in C code. Link: http://lkml.kernel.org/r/4FBB8C40.6080304@redhat.com Reported-by: Avi Kivity Cc: Linus Torvalds Cc: H. Peter Anvin Cc: Thomas Gleixner Signed-off-by: Steven Rostedt --- arch/x86/kernel/nmi.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index a15a88800661..f84f5c57de35 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -395,6 +395,14 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) * thus there is no race between the first check of state for NOT_RUNNING * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs * at this point. + * + * In case the NMI takes a page fault, we need to save off the CR2 + * because the NMI could have preempted another page fault and corrupt + * the CR2 that is about to be read. As nested NMIs must be restarted + * and they can not take breakpoints or page faults, the update of the + * CR2 must be done before converting the nmi state back to NOT_RUNNING. + * Otherwise, there would be a race of another nested NMI coming in + * after setting state to NOT_RUNNING but before updating the nmi_cr2. */ enum nmi_states { NMI_NOT_RUNNING = 0, @@ -402,6 +410,7 @@ enum nmi_states { NMI_LATCHED, }; static DEFINE_PER_CPU(enum nmi_states, nmi_state); +static DEFINE_PER_CPU(unsigned long, nmi_cr2); #define nmi_nesting_preprocess(regs) \ do { \ @@ -410,11 +419,14 @@ static DEFINE_PER_CPU(enum nmi_states, nmi_state); return; \ } \ this_cpu_write(nmi_state, NMI_EXECUTING); \ + this_cpu_write(nmi_cr2, read_cr2()); \ } while (0); \ nmi_restart: #define nmi_nesting_postprocess() \ do { \ + if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \ + write_cr2(this_cpu_read(nmi_cr2)); \ if (this_cpu_dec_return(nmi_state)) \ goto nmi_restart; \ } while (0) -- cgit v1.2.3 From e9071b0be5e7ce4903b7f7c370769d485774d3e3 Mon Sep 17 00:00:00 2001 From: Donald Dutile Date: Fri, 8 Jun 2012 17:13:11 -0400 Subject: iommu/dmar: Use pr_format() instead of PREFIX to tidy up pr_*() calls Joe Perches recommended getting rid of the redundant formatting of adding "PREFIX" to all the uses of pr_*() calls. The recommendation helps to reduce source and improve readibility. While cleaning up the PREFIX's, I saw that one of the pr_warn() was redundant in dmar_parse_one_dev_scope(), since the same message was printed after breaking out of the while loop for the same condition, !pdev. So, to avoid a duplicate message, I removed the one in the while loop. Reported-by: Joe Perches Signed-off-by: Donald Dutile Cc: iommu@lists.linux-foundation.org Cc: chrisw@redhat.com Cc: suresh.b.siddha@intel.com Cc: dwmw2@infradead.org Link: http://lkml.kernel.org/r/1339189991-13129-1-git-send-email-ddutile@redhat.com [ Small whitespace fixes. ] Signed-off-by: Ingo Molnar --- drivers/iommu/dmar.c | 54 +++++++++++++++++++++++----------------------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 9ab6ebf46f7a..86e2f4a62b9a 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -26,6 +26,8 @@ * These routines are used by both DMA-remapping and Interrupt-remapping */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */ + #include #include #include @@ -39,8 +41,6 @@ #include #include -#define PREFIX "DMAR: " - /* No locks are needed as DMA remapping hardware unit * list is constructed at boot time and hotplug of * these units are not supported by the architecture. @@ -83,15 +83,12 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, * ignore it */ if (!bus) { - pr_warn(PREFIX "Device scope bus [%d] not found\n", - scope->bus); + pr_warn("Device scope bus [%d] not found\n", scope->bus); break; } pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn)); if (!pdev) { - pr_warn(PREFIX "Device scope device" - "[%04x:%02x:%02x.%02x] not found\n", - segment, bus->number, path->dev, path->fn); + /* warning will be printed below */ break; } path ++; @@ -99,8 +96,7 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, bus = pdev->subordinate; } if (!pdev) { - pr_warn(PREFIX - "Device scope device [%04x:%02x:%02x.%02x] not found\n", + pr_warn("Device scope device [%04x:%02x:%02x.%02x] not found\n", segment, scope->bus, path->dev, path->fn); *dev = NULL; return 0; @@ -109,8 +105,8 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, pdev->subordinate) || (scope->entry_type == \ ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) { pci_dev_put(pdev); - pr_warn(PREFIX "Device scope type does not match for %s\n", - pci_name(pdev)); + pr_warn("Device scope type does not match for %s\n", + pci_name(pdev)); return -EINVAL; } *dev = pdev; @@ -132,7 +128,7 @@ int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) (*cnt)++; else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) { - pr_warn(PREFIX "Unsupported device scope\n"); + pr_warn("Unsupported device scope\n"); } start += scope->length; } @@ -258,23 +254,23 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) case ACPI_DMAR_TYPE_HARDWARE_UNIT: drhd = container_of(header, struct acpi_dmar_hardware_unit, header); - pr_info(PREFIX "DRHD base: %#016Lx flags: %#x\n", + pr_info("DRHD base: %#016Lx flags: %#x\n", (unsigned long long)drhd->address, drhd->flags); break; case ACPI_DMAR_TYPE_RESERVED_MEMORY: rmrr = container_of(header, struct acpi_dmar_reserved_memory, header); - pr_info(PREFIX "RMRR base: %#016Lx end: %#016Lx\n", + pr_info("RMRR base: %#016Lx end: %#016Lx\n", (unsigned long long)rmrr->base_address, (unsigned long long)rmrr->end_address); break; case ACPI_DMAR_TYPE_ATSR: atsr = container_of(header, struct acpi_dmar_atsr, header); - pr_info(PREFIX "ATSR flags: %#x\n", atsr->flags); + pr_info("ATSR flags: %#x\n", atsr->flags); break; case ACPI_DMAR_HARDWARE_AFFINITY: rhsa = container_of(header, struct acpi_dmar_rhsa, header); - pr_info(PREFIX "RHSA base: %#016Lx proximity domain: %#x\n", + pr_info("RHSA base: %#016Lx proximity domain: %#x\n", (unsigned long long)rhsa->base_address, rhsa->proximity_domain); break; @@ -294,7 +290,7 @@ static int __init dmar_table_detect(void) &dmar_tbl_size); if (ACPI_SUCCESS(status) && !dmar_tbl) { - pr_warn(PREFIX "Unable to map DMAR\n"); + pr_warn("Unable to map DMAR\n"); status = AE_NOT_FOUND; } @@ -328,18 +324,18 @@ parse_dmar_table(void) return -ENODEV; if (dmar->width < PAGE_SHIFT - 1) { - pr_warn(PREFIX "Invalid DMAR haw\n"); + pr_warn("Invalid DMAR haw\n"); return -EINVAL; } - pr_info(PREFIX "Host address width %d\n", dmar->width + 1); + pr_info("Host address width %d\n", dmar->width + 1); entry_header = (struct acpi_dmar_header *)(dmar + 1); while (((unsigned long)entry_header) < (((unsigned long)dmar) + dmar_tbl->length)) { /* Avoid looping forever on bad ACPI tables */ if (entry_header->length == 0) { - pr_warn(PREFIX "Invalid 0-length structure\n"); + pr_warn("Invalid 0-length structure\n"); ret = -EINVAL; break; } @@ -362,7 +358,7 @@ parse_dmar_table(void) #endif break; default: - pr_warn(PREFIX "Unknown DMAR structure type %d\n", + pr_warn("Unknown DMAR structure type %d\n", entry_header->type); ret = 0; /* for forward compatibility */ break; @@ -461,12 +457,12 @@ int __init dmar_table_init(void) ret = parse_dmar_table(); if (ret) { if (ret != -ENODEV) - pr_info(PREFIX "parse DMAR table failure.\n"); + pr_info("parse DMAR table failure.\n"); return ret; } if (list_empty(&dmar_drhd_units)) { - pr_info(PREFIX "No DMAR devices found\n"); + pr_info("No DMAR devices found\n"); return -ENODEV; } @@ -498,7 +494,7 @@ int __init check_zero_address(void) (((unsigned long)dmar) + dmar_tbl->length)) { /* Avoid looping forever on bad ACPI tables */ if (entry_header->length == 0) { - pr_warn(PREFIX "Invalid 0-length structure\n"); + pr_warn("Invalid 0-length structure\n"); return 0; } @@ -549,8 +545,7 @@ int __init detect_intel_iommu(void) if (ret && irq_remapping_enabled && cpu_has_x2apic && dmar->flags & 0x1) - pr_info("Queued invalidation will be enabled to " - "support x2apic and Intr-remapping.\n"); + pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n"); if (ret && !no_iommu && !iommu_detected && !dmar_disabled) { iommu_detected = 1; @@ -580,9 +575,9 @@ static void unmap_iommu(struct intel_iommu *iommu) * map_iommu: map the iommu's registers * @iommu: the iommu to map * @phys_addr: the physical address of the base resgister - * + * * Memory map the iommu's registers. Start w/ a single page, and - * possibly expand if that turns out to be insufficent. + * possibly expand if that turns out to be insufficent. */ static int map_iommu(struct intel_iommu *iommu, u64 phys_addr) { @@ -1302,8 +1297,7 @@ int __init enable_drhd_fault_handling(void) ret = dmar_set_interrupt(iommu); if (ret) { - pr_err("DRHD %Lx: failed to enable fault, " - " interrupt, ret %d\n", + pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n", (unsigned long long)drhd->reg_base_addr, ret); return -1; } -- cgit v1.2.3 From e2b297fcf17fc03734e93387fb8195c782286b35 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Sun, 10 Jun 2012 21:13:41 -0600 Subject: perf/x86: Convert obsolete simple_strtoul() usage to kstrtoul() Signed-off-by: Shuah Khan Cc: Peter Zijlstra Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1339384421.3025.8.camel@lorien2 Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 000a4746c7ce..766c76d5ec4c 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1640,7 +1640,12 @@ static ssize_t set_attr_rdpmc(struct device *cdev, struct device_attribute *attr, const char *buf, size_t count) { - unsigned long val = simple_strtoul(buf, NULL, 0); + unsigned long val; + ssize_t ret; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; if (!!val != !!x86_pmu.attr_rdpmc) { x86_pmu.attr_rdpmc = !!val; -- cgit v1.2.3 From 110c1e1f1bf61e5dca53ff5c9dc75243ce87c002 Mon Sep 17 00:00:00 2001 From: Ravikiran Thirumalai Date: Sun, 3 Jun 2012 01:11:35 +0300 Subject: x86/vsmp: Ignore IOAPIC IRQ affinity if possible vSMP can route interrupts more optimally based on internal knowledge the OS does not have. In order to support this optimization, all CPUs must be able to handle all possible IOAPIC interrupts. Fix this by setting the vector allocation domain for all CPUs and by enabling this feature in vSMP. Signed-off-by: Ravikiran Thirumalai Signed-off-by: Shai Fultheim [ Rebased, simplified, and reworded the commit message. ] Signed-off-by: Ido Yariv Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsmp_64.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 59eea855f451..6b96a7374f94 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -95,6 +96,15 @@ static void __init set_vsmp_pv_ops(void) ctl = readl(address + 4); printk(KERN_INFO "vSMP CTL: capabilities:0x%08x control:0x%08x\n", cap, ctl); + + /* If possible, let the vSMP foundation route the interrupt optimally */ +#ifdef CONFIG_SMP + if (cap & ctl & BIT(8)) { + ctl &= ~BIT(8); + no_irq_affinity = 1; + } +#endif + if (cap & ctl & (1 << 4)) { /* Setup irq ops and turn on vSMP IRQ fastpath handling */ pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); @@ -102,12 +112,11 @@ static void __init set_vsmp_pv_ops(void) pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); pv_init_ops.patch = vsmp_patch; - ctl &= ~(1 << 4); - writel(ctl, address + 4); - ctl = readl(address + 4); - printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl); } + writel(ctl, address + 4); + ctl = readl(address + 4); + pr_info("vSMP CTL: control set to:0x%08x\n", ctl); early_iounmap(address, 8); } @@ -192,10 +201,20 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) return hard_smp_processor_id() >> index_msb; } +/* + * In vSMP, all cpus should be capable of handling interrupts, regardless of + * the APIC used. + */ +static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask) +{ + cpumask_setall(retmask); +} + static void vsmp_apic_post_init(void) { /* need to update phys_pkg_id */ apic->phys_pkg_id = apicid_phys_pkg_id; + apic->vector_allocation_domain = fill_vector_allocation_domain; } void __init vsmp_init(void) -- cgit v1.2.3 From b871a42b6091b720e82ddff237659534c525c25b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Jun 2012 15:07:08 +0200 Subject: smpboot: Remove leftover declaration Signed-off-by: Thomas Gleixner --- kernel/smpboot.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/smpboot.h b/kernel/smpboot.h index 80c0acfb8472..6ef9433e1c70 100644 --- a/kernel/smpboot.h +++ b/kernel/smpboot.h @@ -3,8 +3,6 @@ struct task_struct; -int smpboot_prepare(unsigned int cpu); - #ifdef CONFIG_GENERIC_SMP_IDLE_THREAD struct task_struct *idle_thread_get(unsigned int cpu); void idle_thread_set_boot_cpu(void); -- cgit v1.2.3 From ed88bed881c9948c4035828c5d63f60c7b015f86 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 12 Jun 2012 19:26:33 +0300 Subject: x86/apic/irq_remap: Silence a bogus pr_err() There is an extra semicolon here so the pr_err() message is printed when it is not intended. Signed-off-by: Dan Carpenter Acked-by: Yinghai Lu Cc: Alexander Gordeev Cc: Suresh Siddha Cc: Joerg Roedel Link: http://lkml.kernel.org/r/20120612162633.GA11077@elgon.mountain Signed-off-by: Ingo Molnar --- drivers/iommu/intel_irq_remapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index dafbad06390a..853902a1b7db 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -938,7 +938,7 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest); if (err) { - if (assign_irq_vector(irq, cfg, data->affinity)); + if (assign_irq_vector(irq, cfg, data->affinity)) pr_err("Failed to recover vector for irq %d\n", irq); return err; } -- cgit v1.2.3 From 2f74759056797054122cdc70844137f70bb3f626 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 7 Jun 2012 22:20:18 +0900 Subject: x86/alternatives: Use atomic_xchg() instead atomic_dec_and_test() for stop_machine_text_poke() stop_machine_text_poke() uses atomic_dec_and_test() to select one of the CPUs executing that function to actually modify the code. Since the variable is initialized to 1, subsequent CPUs will make the variable go negative. Since going negative is uncommon/unexpected in typical dec_and_test usage change this user to atomic_xchg(). This was found using a patch that warns on dec_and_test going negative. Signed-off-by: OGAWA Hirofumi Acked-by: Steven Rostedt [ Rewrote changelog ] Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/87zk8fgsx9.fsf@devron.myhome.or.jp Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 1f84794f0759..53231a045d3d 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -664,7 +664,7 @@ static int __kprobes stop_machine_text_poke(void *data) struct text_poke_param *p; int i; - if (atomic_dec_and_test(&stop_machine_first)) { + if (atomic_xchg(&stop_machine_first, 0)) { for (i = 0; i < tpp->nparams; i++) { p = &tpp->params[i]; text_poke(p->addr, p->opcode, p->len); -- cgit v1.2.3 From cac4afbc3da58d9e5701b34bd4c1f11ea13328d4 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 14 Jun 2012 12:39:34 +0200 Subject: x86/x2apic/cluster: Vector_allocation_domain() should return a value Since commit 8637e38 ("x86/apic: Avoid useless scanning thru a cpumask in assign_irq_vector()") vector_allocation_domain() operation indicates if a cpumask is dynamic or static. This update fixes the oversight and makes the operation to return a value. Signed-off-by: Alexander Gordeev Cc: Suresh Siddha Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20120614103933.GJ3383@dhcp-26-207.brq.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_cluster.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 23a46cf5b6fd..1885a73b7f33 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -228,10 +228,11 @@ static int x2apic_cluster_probe(void) /* * Each x2apic cluster is an allocation domain. */ -static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask) +static bool cluster_vector_allocation_domain(int cpu, struct cpumask *retmask) { cpumask_clear(retmask); cpumask_copy(retmask, per_cpu(cpus_in_cluster, cpu)); + return true; } static struct apic apic_x2apic_cluster = { -- cgit v1.2.3 From a5a391561bc25898ba1a702a0c4b028aa5b11ce9 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 14 Jun 2012 09:49:35 +0200 Subject: x86/apic: Eliminate cpu_mask_to_apicid() operation Since there are only two locations where cpu_mask_to_apicid() is called from, remove the operation and use only cpu_mask_to_apicid_and() instead. Signed-off-by: Alexander Gordeev Suggested-and-acked-by: Suresh Siddha Acked-by: Yinghai Lu Link: http://lkml.kernel.org/r/20120614074935.GE3383@dhcp-26-207.brq.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 33 ++++++++------------------------- arch/x86/kernel/apic/apic.c | 24 ++++++------------------ arch/x86/kernel/apic/apic_flat_64.c | 2 -- arch/x86/kernel/apic/apic_noop.c | 1 - arch/x86/kernel/apic/apic_numachip.c | 1 - arch/x86/kernel/apic/bigsmp_32.c | 1 - arch/x86/kernel/apic/es7000_32.c | 4 +--- arch/x86/kernel/apic/io_apic.c | 3 ++- arch/x86/kernel/apic/numaq_32.c | 8 -------- arch/x86/kernel/apic/probe_32.c | 1 - arch/x86/kernel/apic/summit_32.c | 3 +-- arch/x86/kernel/apic/x2apic_cluster.c | 17 ----------------- arch/x86/kernel/apic/x2apic_phys.c | 1 - arch/x86/kernel/apic/x2apic_uv_x.c | 29 ++++++----------------------- arch/x86/platform/uv/uv_irq.c | 2 +- 15 files changed, 25 insertions(+), 105 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1ed3eead2039..eec240e12091 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -331,8 +331,6 @@ struct apic { unsigned long (*set_apic_id)(unsigned int id); unsigned long apic_id_mask; - int (*cpu_mask_to_apicid)(const struct cpumask *cpumask, - unsigned int *apicid); int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, const struct cpumask *andmask, unsigned int *apicid); @@ -594,9 +592,15 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) #endif static inline int -__flat_cpu_mask_to_apicid(unsigned long cpu_mask, unsigned int *apicid) +flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask, + unsigned int *apicid) { - cpu_mask = cpu_mask & APIC_ALL_CPUS & cpumask_bits(cpu_online_mask)[0]; + unsigned long cpu_mask = cpumask_bits(cpumask)[0] & + cpumask_bits(andmask)[0] & + cpumask_bits(cpu_online_mask)[0] & + APIC_ALL_CPUS; + if (likely(cpu_mask)) { *apicid = (unsigned int)cpu_mask; return 0; @@ -605,27 +609,6 @@ __flat_cpu_mask_to_apicid(unsigned long cpu_mask, unsigned int *apicid) } } -static inline int -flat_cpu_mask_to_apicid(const struct cpumask *cpumask, - unsigned int *apicid) -{ - return __flat_cpu_mask_to_apicid(cpumask_bits(cpumask)[0], apicid); -} - -static inline int -flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask, - unsigned int *apicid) -{ - unsigned long mask1 = cpumask_bits(cpumask)[0]; - unsigned long mask2 = cpumask_bits(andmask)[0]; - return __flat_cpu_mask_to_apicid(mask1 & mask2, apicid); -} - -extern int -default_cpu_mask_to_apicid(const struct cpumask *cpumask, - unsigned int *apicid); - extern int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, const struct cpumask *andmask, diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 7e9bbe73bc5a..048a4f806d46 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2123,23 +2123,6 @@ void default_init_apic_ldr(void) apic_write(APIC_LDR, val); } -static inline int __default_cpu_to_apicid(int cpu, unsigned int *apicid) -{ - if (likely((unsigned int)cpu < nr_cpu_ids)) { - *apicid = per_cpu(x86_cpu_to_apicid, cpu); - return 0; - } else { - return -EINVAL; - } -} - -int default_cpu_mask_to_apicid(const struct cpumask *cpumask, - unsigned int *apicid) -{ - int cpu = cpumask_first_and(cpumask, cpu_online_mask); - return __default_cpu_to_apicid(cpu, apicid); -} - int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, const struct cpumask *andmask, unsigned int *apicid) @@ -2151,7 +2134,12 @@ int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, break; } - return __default_cpu_to_apicid(cpu, apicid); + if (likely((unsigned int)cpu < nr_cpu_ids)) { + *apicid = per_cpu(x86_cpu_to_apicid, cpu); + return 0; + } else { + return -EINVAL; + } } /* diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index bddc92566d0a..00c77cf78e9e 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -191,7 +191,6 @@ static struct apic apic_flat = { .set_apic_id = set_apic_id, .apic_id_mask = 0xFFu << 24, - .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .send_IPI_mask = flat_send_IPI_mask, @@ -308,7 +307,6 @@ static struct apic apic_physflat = { .set_apic_id = set_apic_id, .apic_id_mask = 0xFFu << 24, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = physflat_send_IPI_mask, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index ac9edf247b15..65c07fc630a1 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -159,7 +159,6 @@ struct apic apic_noop = { .set_apic_id = NULL, .apic_id_mask = 0x0F << 24, - .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .send_IPI_mask = noop_send_IPI_mask, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index c028132ad358..bc552cff2578 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -234,7 +234,6 @@ static struct apic apic_numachip __refconst = { .set_apic_id = set_apic_id, .apic_id_mask = 0xffU << 24, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = numachip_send_IPI_mask, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index df342fe4d6aa..d50e3640d5ae 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -188,7 +188,6 @@ static struct apic apic_bigsmp = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = bigsmp_send_IPI_mask, diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index b35cfb9b6962..2c5317ea1b83 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -525,7 +525,7 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid) return 1; } -static int +static inline int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) { unsigned int round = 0; @@ -643,7 +643,6 @@ static struct apic __refdata apic_es7000_cluster = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, .send_IPI_mask = es7000_send_IPI_mask, @@ -710,7 +709,6 @@ static struct apic __refdata apic_es7000 = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, .send_IPI_mask = es7000_send_IPI_mask, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 0deb773404e5..0540f083f452 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1492,7 +1492,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, * We use logical delivery to get the timer IRQ * to the first CPU. */ - if (unlikely(apic->cpu_mask_to_apicid(apic->target_cpus(), &dest))) + if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(), + apic->target_cpus(), &dest))) dest = BAD_APICID; entry.dest_mode = apic->irq_dest_mode; diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 2b55514c328b..d661ee95cabf 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -406,13 +406,6 @@ static inline int numaq_check_phys_apicid_present(int phys_apicid) * We use physical apicids here, not logical, so just return the default * physical broadcast to stop people from breaking us */ -static int -numaq_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid) -{ - *apicid = 0x0F; - return 0; -} - static int numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, const struct cpumask *andmask, @@ -499,7 +492,6 @@ static struct apic __refdata apic_numaq = { .set_apic_id = NULL, .apic_id_mask = 0x0F << 24, - .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, .send_IPI_mask = numaq_send_IPI_mask, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 2c6f003b2e4b..eef6bcd1bf1e 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -108,7 +108,6 @@ static struct apic apic_default = { .set_apic_id = NULL, .apic_id_mask = 0x0F << 24, - .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .send_IPI_mask = default_send_IPI_mask_logical, diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 79d360f6729e..bbad180f2890 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -263,7 +263,7 @@ static int summit_check_phys_apicid_present(int physical_apicid) return 1; } -static int +static inline int summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) { unsigned int round = 0; @@ -516,7 +516,6 @@ static struct apic apic_summit = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, .send_IPI_mask = summit_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 1885a73b7f33..943d03fc6fc4 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -96,22 +96,6 @@ static void x2apic_send_IPI_all(int vector) __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); } -static int -x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid) -{ - int cpu = cpumask_first_and(cpumask, cpu_online_mask); - int i; - - if (cpu >= nr_cpu_ids) - return -EINVAL; - - *apicid = 0; - for_each_cpu_and(i, cpumask, per_cpu(cpus_in_cluster, cpu)) - *apicid |= per_cpu(x86_cpu_to_logical_apicid, i); - - return 0; -} - static int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, const struct cpumask *andmask, @@ -270,7 +254,6 @@ static struct apic apic_x2apic_cluster = { .set_apic_id = x2apic_set_apic_id, .apic_id_mask = 0xFFFFFFFFu, - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, .send_IPI_mask = x2apic_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index f109388a0e80..e03a1e180e81 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -123,7 +123,6 @@ static struct apic apic_x2apic_phys = { .set_apic_id = x2apic_set_apic_id, .apic_id_mask = 0xFFFFFFFFu, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = x2apic_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 307aa076bd62..026de0114d15 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -269,27 +269,6 @@ static void uv_init_apic_ldr(void) { } -static inline int __uv_cpu_to_apicid(int cpu, unsigned int *apicid) -{ - if (likely((unsigned int)cpu < nr_cpu_ids)) { - *apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; - return 0; - } else { - return -EINVAL; - } -} - -static int -uv_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *apicid) -{ - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - int cpu = cpumask_first_and(cpumask, cpu_online_mask); - return __uv_cpu_to_apicid(cpu, apicid); -} - static int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, const struct cpumask *andmask, @@ -306,7 +285,12 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, break; } - return __uv_cpu_to_apicid(cpu, apicid); + if (likely((unsigned int)cpu < nr_cpu_ids)) { + *apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; + return 0; + } else { + return -EINVAL; + } } static unsigned int x2apic_get_apic_id(unsigned long x) @@ -384,7 +368,6 @@ static struct apic __refdata apic_x2apic_uv_x = { .set_apic_id = set_apic_id, .apic_id_mask = 0xFFFFFFFFu, - .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, .send_IPI_mask = uv_send_IPI_mask, diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index dd1ff39a464c..a67c7a6bac7e 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -144,7 +144,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, if (err != 0) return err; - err = apic->cpu_mask_to_apicid(eligible_cpu, &dest); + err = apic->cpu_mask_to_apicid_and(eligible_cpu, eligible_cpu, &dest); if (err != 0) return err; -- cgit v1.2.3 From ea3807ea52a53f2cdfd60c89d8491fc9a8208d1c Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 14 Jun 2012 09:49:55 +0200 Subject: x86/apic: Fix ugly casting and branching in cpu_mask_to_apicid_and() Signed-off-by: Alexander Gordeev Cc: Suresh Siddha Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20120614074954.GF3383@dhcp-26-207.brq.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 8 ++++---- arch/x86/kernel/apic/es7000_32.c | 2 +- arch/x86/kernel/apic/summit_32.c | 2 +- arch/x86/kernel/apic/x2apic_uv_x.c | 8 ++++---- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 048a4f806d46..c421512ca5eb 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2127,19 +2127,19 @@ int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, const struct cpumask *andmask, unsigned int *apicid) { - int cpu; + unsigned int cpu; for_each_cpu_and(cpu, cpumask, andmask) { if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - if (likely((unsigned int)cpu < nr_cpu_ids)) { + if (likely(cpu < nr_cpu_ids)) { *apicid = per_cpu(x86_cpu_to_apicid, cpu); return 0; - } else { - return -EINVAL; } + + return -EINVAL; } /* diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 2c5317ea1b83..effece2ea0db 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -529,7 +529,7 @@ static inline int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) { unsigned int round = 0; - int cpu, uninitialized_var(apicid); + unsigned int cpu, uninitialized_var(apicid); /* * The cpus in the mask must all be on the apic cluster. diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index bbad180f2890..b53fd6c9993a 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -267,7 +267,7 @@ static inline int summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) { unsigned int round = 0; - int cpu, apicid = 0; + unsigned int cpu, apicid = 0; /* * The cpus in the mask must all be on the apic cluster. diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 026de0114d15..8cfade9510a4 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -274,7 +274,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, const struct cpumask *andmask, unsigned int *apicid) { - int cpu; + int unsigned cpu; /* * We're using fixed IRQ delivery, can only return one phys APIC ID. @@ -285,12 +285,12 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, break; } - if (likely((unsigned int)cpu < nr_cpu_ids)) { + if (likely(cpu < nr_cpu_ids)) { *apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; return 0; - } else { - return -EINVAL; } + + return -EINVAL; } static unsigned int x2apic_get_apic_id(unsigned long x) -- cgit v1.2.3 From 49ad3fd4834182cce9725abb98e080b479fed464 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 14 Jun 2012 09:50:11 +0200 Subject: x86/apic/es7000+summit: Fix compile warning in cpu_mask_to_apicid() Signed-off-by: Alexander Gordeev Cc: Suresh Siddha Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20120614075010.GG3383@dhcp-26-207.brq.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/es7000_32.c | 2 +- arch/x86/kernel/apic/summit_32.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index effece2ea0db..0c1347df3ad0 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -554,8 +554,8 @@ es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, const struct cpumask *andmask, unsigned int *apicid) { - *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); cpumask_var_t cpumask; + *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) return 0; diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index b53fd6c9993a..e6cc1829f7c8 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -291,8 +291,8 @@ summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, const struct cpumask *andmask, unsigned int *apicid) { - *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); cpumask_var_t cpumask; + *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) return 0; -- cgit v1.2.3 From 214e270b5f5f6a85400a817d5305c797b2b7467a Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 14 Jun 2012 09:50:27 +0200 Subject: x86/apic/es7000+summit: Always make valid apicid from a cpumask In case of invalid parameters cpu_mask_to_apicid_and() might return apicid value of 0 (on Summit) or a uninitialized value (on ES7000), although it is supposed to return apicid of cpu-0 at least. Fix the operation to always return a valid apicid. Signed-off-by: Alexander Gordeev Cc: Suresh Siddha Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20120614075026.GH3383@dhcp-26-207.brq.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/es7000_32.c | 2 ++ arch/x86/kernel/apic/summit_32.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 0c1347df3ad0..9882093f26e8 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -545,6 +545,8 @@ es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) apicid = new_apicid; round++; } + if (!round) + return -EINVAL; *dest_id = apicid; return 0; } diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index e6cc1829f7c8..b6e61857c29f 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -282,6 +282,8 @@ summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) apicid |= new_apicid; round++; } + if (!round) + return -EINVAL; *dest_id = apicid; return 0; } -- cgit v1.2.3 From 5a0a2a308113086cc800a203d903271c9caa1611 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 14 Jun 2012 09:50:44 +0200 Subject: x86/apic/es7000: Make apicid of a cluster (not CPU) from a cpumask cpu_mask_to_apicid_and() always returns apicid of a single CPU, even in case multiple CPUs were requested. This update fixes a typo and forces apicid of a cluster to be returned. Signed-off-by: Alexander Gordeev Cc: Suresh Siddha Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20120614075043.GI3383@dhcp-26-207.brq.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/es7000_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 9882093f26e8..0874799a98c6 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -542,7 +542,7 @@ es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) return -EINVAL; } - apicid = new_apicid; + apicid |= new_apicid; round++; } if (!round) -- cgit v1.2.3 From 8d240dd88cca33b704adf3fe281aa64b5aac2dd8 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 29 Mar 2012 19:11:40 +0200 Subject: ftrace: Remove a superfluous check register_ftrace_function() checks ftrace_disabled and calls __register_ftrace_function which does it again. Drop the first check and add the unlikely hint to the second one. Also, drop the label as John correctly notices. No functional change. Link: http://lkml.kernel.org/r/20120329171140.GE6409@aftab Cc: Borislav Petkov Cc: John Kacur Signed-off-by: Borislav Petkov Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a008663d86c8..b4f20fba09fc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -312,7 +312,7 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list, static int __register_ftrace_function(struct ftrace_ops *ops) { - if (ftrace_disabled) + if (unlikely(ftrace_disabled)) return -ENODEV; if (FTRACE_WARN_ON(ops == &global_ops)) @@ -4299,16 +4299,12 @@ int register_ftrace_function(struct ftrace_ops *ops) mutex_lock(&ftrace_lock); - if (unlikely(ftrace_disabled)) - goto out_unlock; - ret = __register_ftrace_function(ops); if (!ret) ret = ftrace_startup(ops, 0); - - out_unlock: mutex_unlock(&ftrace_lock); + return ret; } EXPORT_SYMBOL_GPL(register_ftrace_function); -- cgit v1.2.3 From 5da43bed800770906fca24deef7ae3d456823b86 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 31 May 2012 21:28:58 -0400 Subject: tracing: Add comments for the other bits of ftrace_event_call.flags TRACE_EVENT_FL_ENABLED_BIT, TRACE_EVENT_FL_FILTERED_BIT, TRACE_EVENT_FL_RECORDED_CMD_BIT, Have comments about what they are, but: TRACE_EVENT_FL_CAP_ANY_BIT, TRACE_EVENT_FL_NO_SET_FILTER_BIT, TRACE_EVENT_FL_IGNORE_ENABLE_BIT, do not, making them second class citizens. To prevent another class warfare, these bits have protested for their right to be commented. And By Golly! I'll give them what they want! Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 176a939d1547..1aff18346c71 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -207,6 +207,9 @@ struct ftrace_event_call { * bit 1: enabled * bit 2: filter_active * bit 3: enabled cmd record + * bit 4: allow trace by non root (cap any) + * bit 5: failed to apply filter + * bit 6: ftrace internal event (do not enable) * * Changes to flags must hold the event_mutex. * -- cgit v1.2.3 From 7374e82771c6d5a9af2080be46f64a5826c7efb1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 31 May 2012 21:40:05 -0400 Subject: tracing: Register the ftrace internal events during early boot All trace events including ftrace internel events (like trace_printk and function tracing), register functions that describe how to print their output. The events may be recorded as soon as the ring buffer is allocated, but they are just raw binary in the buffer. The mapping of event ids to how to print them are held within a structure that is registered on system boot. If a crash happens in boot up before these functions are registered then their output (via ftrace_dump_on_oops) will be useless: Dumping ftrace buffer: --------------------------------- <...>-1 0.... 319705us : Unknown type 6 --------------------------------- This can be quite frustrating for a kernel developer trying to see what is going wrong. There's no reason to register them so late in the boot up process. They can be registered by early_initcall(). Reported-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- kernel/trace/trace_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index df611a0e76c5..123b189c732c 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1325,4 +1325,4 @@ __init static int init_events(void) return 0; } -device_initcall(init_events); +early_initcall(init_events); -- cgit v1.2.3 From d48daf37a3d2e2b28a61e615c0fc538301edb0dd Mon Sep 17 00:00:00 2001 From: Ido Yariv Date: Thu, 14 Jun 2012 18:43:08 +0300 Subject: x86/vsmp: Fix linker error when CONFIG_PROC_FS is not set set_vsmp_pv_ops() references no_irq_affinity which is undeclared if CONFIG_PROC_FS isn't set. Fix this by adding an #ifdef around this variable's access. Reported-by: Fengguang Wu Signed-off-by: Ido Yariv Acked-by: Shai Fultheim Link: http://lkml.kernel.org/r/1339688588-12674-1-git-send-email-ido@wizery.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsmp_64.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 6b96a7374f94..3f0285ac00fa 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -101,7 +101,10 @@ static void __init set_vsmp_pv_ops(void) #ifdef CONFIG_SMP if (cap & ctl & BIT(8)) { ctl &= ~BIT(8); +#ifdef CONFIG_PROC_FS + /* Don't let users change irq affinity via procfs */ no_irq_affinity = 1; +#endif } #endif -- cgit v1.2.3 From 7eb9ae0799b1e9f0b77733b432bc5f6f055b020b Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 14 Jun 2012 18:28:49 -0700 Subject: irq/apic: Use config_enabled(CONFIG_SMP) checks to clean up irq_set_affinity() for UP Move the ->irq_set_affinity() routines out of the #ifdef CONFIG_SMP sections and use config_enabled(CONFIG_SMP) checks inside those routines. Thus making those routines simple null stubs for !CONFIG_SMP and retaining those routines with no additional runtime overhead for CONFIG_SMP kernels. Cleans up the ifdef CONFIG_SMP in and around routines related to irq_set_affinity in io_apic and irq_remapping subsystems. Signed-off-by: Suresh Siddha Cc: torvalds@linux-foundation.org Cc: joerg.roedel@amd.com Cc: Sam Ravnborg Cc: Paul Gortmaker Link: http://lkml.kernel.org/r/1339723729.3475.63.camel@sbsiddha-desk.sc.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 180 ++++++++++++++++-------------------- drivers/iommu/intel_irq_remapping.c | 7 +- drivers/iommu/irq_remapping.c | 5 +- drivers/iommu/irq_remapping.h | 2 - include/linux/irq.h | 2 - 5 files changed, 86 insertions(+), 110 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7cbd397884f5..a951ef7decb1 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2224,81 +2224,6 @@ void send_cleanup_vector(struct irq_cfg *cfg) cfg->move_in_progress = 0; } -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) -{ - int apic, pin; - struct irq_pin_list *entry; - u8 vector = cfg->vector; - - for_each_irq_pin(entry, cfg->irq_2_pin) { - unsigned int reg; - - apic = entry->apic; - pin = entry->pin; - /* - * With interrupt-remapping, destination information comes - * from interrupt-remapping table entry. - */ - if (!irq_remapped(cfg)) - io_apic_write(apic, 0x11 + pin*2, dest); - reg = io_apic_read(apic, 0x10 + pin*2); - reg &= ~IO_APIC_REDIR_VECTOR_MASK; - reg |= vector; - io_apic_modify(apic, 0x10 + pin*2, reg); - } -} - -/* - * Either sets data->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and - * leaves data->affinity untouched. - */ -int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - unsigned int *dest_id) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int irq = data->irq; - int err; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return -EINVAL; - - err = assign_irq_vector(irq, cfg, mask); - if (err) - return err; - - err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id); - if (err) { - if (assign_irq_vector(irq, cfg, data->affinity)) - pr_err("Failed to recover vector for irq %d\n", irq); - return err; - } - - cpumask_copy(data->affinity, mask); - - return 0; -} - -static int -ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - unsigned int dest, irq = data->irq; - unsigned long flags; - int ret; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - ret = __ioapic_set_affinity(data, mask, &dest); - if (!ret) { - /* Only the high 8 bits are valid. */ - dest = SET_APIC_LOGICAL_ID(dest); - __target_IO_APIC_irq(irq, dest, data->chip_data); - ret = IRQ_SET_MASK_OK_NOCOPY; - } - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - return ret; -} - asmlinkage void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; @@ -2386,6 +2311,87 @@ void irq_force_complete_move(int irq) static inline void irq_complete_move(struct irq_cfg *cfg) { } #endif +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) +{ + int apic, pin; + struct irq_pin_list *entry; + u8 vector = cfg->vector; + + for_each_irq_pin(entry, cfg->irq_2_pin) { + unsigned int reg; + + apic = entry->apic; + pin = entry->pin; + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(cfg)) + io_apic_write(apic, 0x11 + pin*2, dest); + reg = io_apic_read(apic, 0x10 + pin*2); + reg &= ~IO_APIC_REDIR_VECTOR_MASK; + reg |= vector; + io_apic_modify(apic, 0x10 + pin*2, reg); + } +} + +/* + * Either sets data->affinity to a valid value, and returns + * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and + * leaves data->affinity untouched. + */ +int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, + unsigned int *dest_id) +{ + struct irq_cfg *cfg = data->chip_data; + unsigned int irq = data->irq; + int err; + + if (!config_enabled(CONFIG_SMP)) + return -1; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return -EINVAL; + + err = assign_irq_vector(irq, cfg, mask); + if (err) + return err; + + err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id); + if (err) { + if (assign_irq_vector(irq, cfg, data->affinity)) + pr_err("Failed to recover vector for irq %d\n", irq); + return err; + } + + cpumask_copy(data->affinity, mask); + + return 0; +} + +static int +ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + unsigned int dest, irq = data->irq; + unsigned long flags; + int ret; + + if (!config_enabled(CONFIG_SMP)) + return -1; + + raw_spin_lock_irqsave(&ioapic_lock, flags); + ret = __ioapic_set_affinity(data, mask, &dest); + if (!ret) { + /* Only the high 8 bits are valid. */ + dest = SET_APIC_LOGICAL_ID(dest); + __target_IO_APIC_irq(irq, dest, data->chip_data); + ret = IRQ_SET_MASK_OK_NOCOPY; + } + raw_spin_unlock_irqrestore(&ioapic_lock, flags); + return ret; +} + static void ack_apic_edge(struct irq_data *data) { irq_complete_move(data->chip_data); @@ -2565,9 +2571,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip) chip->irq_ack = ir_ack_apic_edge; chip->irq_eoi = ir_ack_apic_level; -#ifdef CONFIG_SMP chip->irq_set_affinity = set_remapped_irq_affinity; -#endif } #endif /* CONFIG_IRQ_REMAP */ @@ -2578,9 +2582,7 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_unmask = unmask_ioapic_irq, .irq_ack = ack_apic_edge, .irq_eoi = ack_apic_level, -#ifdef CONFIG_SMP .irq_set_affinity = ioapic_set_affinity, -#endif .irq_retrigger = ioapic_retrigger_irq, }; @@ -3099,7 +3101,6 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, return err; } -#ifdef CONFIG_SMP static int msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { @@ -3121,7 +3122,6 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) return IRQ_SET_MASK_OK_NOCOPY; } -#endif /* CONFIG_SMP */ /* * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, @@ -3132,9 +3132,7 @@ static struct irq_chip msi_chip = { .irq_unmask = unmask_msi_irq, .irq_mask = mask_msi_irq, .irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP .irq_set_affinity = msi_set_affinity, -#endif .irq_retrigger = ioapic_retrigger_irq, }; @@ -3219,7 +3217,6 @@ void native_teardown_msi_irq(unsigned int irq) } #ifdef CONFIG_DMAR_TABLE -#ifdef CONFIG_SMP static int dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) @@ -3244,16 +3241,12 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, return IRQ_SET_MASK_OK_NOCOPY; } -#endif /* CONFIG_SMP */ - static struct irq_chip dmar_msi_type = { .name = "DMAR_MSI", .irq_unmask = dmar_msi_unmask, .irq_mask = dmar_msi_mask, .irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP .irq_set_affinity = dmar_msi_set_affinity, -#endif .irq_retrigger = ioapic_retrigger_irq, }; @@ -3274,7 +3267,6 @@ int arch_setup_dmar_msi(unsigned int irq) #ifdef CONFIG_HPET_TIMER -#ifdef CONFIG_SMP static int hpet_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { @@ -3297,16 +3289,12 @@ static int hpet_msi_set_affinity(struct irq_data *data, return IRQ_SET_MASK_OK_NOCOPY; } -#endif /* CONFIG_SMP */ - static struct irq_chip hpet_msi_type = { .name = "HPET_MSI", .irq_unmask = hpet_msi_unmask, .irq_mask = hpet_msi_mask, .irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP .irq_set_affinity = hpet_msi_set_affinity, -#endif .irq_retrigger = ioapic_retrigger_irq, }; @@ -3341,8 +3329,6 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id) */ #ifdef CONFIG_HT_IRQ -#ifdef CONFIG_SMP - static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) { struct ht_irq_msg msg; @@ -3370,16 +3356,12 @@ ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) return IRQ_SET_MASK_OK_NOCOPY; } -#endif - static struct irq_chip ht_irq_chip = { .name = "PCI-HT", .irq_mask = mask_ht_irq, .irq_unmask = unmask_ht_irq, .irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP .irq_set_affinity = ht_set_affinity, -#endif .irq_retrigger = ioapic_retrigger_irq, }; diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 853902a1b7db..e0b18f3ae9a8 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -902,7 +902,6 @@ static int intel_setup_ioapic_entry(int irq, return 0; } -#ifdef CONFIG_SMP /* * Migrate the IO-APIC irq in the presence of intr-remapping. * @@ -926,6 +925,9 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irte irte; int err; + if (!config_enabled(CONFIG_SMP)) + return -EINVAL; + if (!cpumask_intersects(mask, cpu_online_mask)) return -EINVAL; @@ -963,7 +965,6 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, cpumask_copy(data->affinity, mask); return 0; } -#endif static void intel_compose_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, @@ -1065,9 +1066,7 @@ struct irq_remap_ops intel_irq_remap_ops = { .reenable = reenable_irq_remapping, .enable_faulting = enable_drhd_fault_handling, .setup_ioapic_entry = intel_setup_ioapic_entry, -#ifdef CONFIG_SMP .set_affinity = intel_ioapic_set_affinity, -#endif .free_irq = free_irte, .compose_msi_msg = intel_compose_msi_msg, .msi_alloc_irq = intel_msi_alloc_irq, diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 40cda8e98d87..1d29b1c66e72 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -111,16 +111,15 @@ int setup_ioapic_remapped_entry(int irq, vector, attr); } -#ifdef CONFIG_SMP int set_remapped_irq_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - if (!remap_ops || !remap_ops->set_affinity) + if (!config_enabled(CONFIG_SMP) || !remap_ops || + !remap_ops->set_affinity) return 0; return remap_ops->set_affinity(data, mask, force); } -#endif void free_remapped_irq(int irq) { diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index be9d72950c51..b12974cc1dfe 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -59,11 +59,9 @@ struct irq_remap_ops { unsigned int, int, struct io_apic_irq_attr *); -#ifdef CONFIG_SMP /* Set the CPU affinity of a remapped interrupt */ int (*set_affinity)(struct irq_data *data, const struct cpumask *mask, bool force); -#endif /* Free an IRQ */ int (*free_irq)(int); diff --git a/include/linux/irq.h b/include/linux/irq.h index 61f5cec031e0..47a937cd84af 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -150,9 +150,7 @@ struct irq_data { void *handler_data; void *chip_data; struct msi_desc *msi_desc; -#ifdef CONFIG_SMP cpumask_var_t affinity; -#endif }; /* -- cgit v1.2.3 From ea131377148cdfe90641b42ae9aa5a6b3a4fa327 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 15 Jun 2012 17:43:22 +0200 Subject: uprobes: Valid_vma() should reject VM_HUGETLB __replace_page() obviously can't work with the hugetlbfs mappings, uprobe_register() will likely crash the kernel. Change valid_vma() to check VM_HUGETLB as well. As for PageTransHuge() no need to worry, vma->vm_file != NULL. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120615154322.GA9561@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index b52376d02332..f0d04530af63 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -99,7 +99,8 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register) if (!is_register) return true; - if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC)) + if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) + == (VM_READ|VM_EXEC)) return true; return false; -- cgit v1.2.3 From cc359d180fa9c25a4c1819f17e07a422d788353d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 15 Jun 2012 17:43:25 +0200 Subject: uprobes: __copy_insn() should ensure a_ops->readpage != NULL __copy_insn() blindly calls read_mapping_page(), this will crash the kernel if ->readpage == NULL, add the necessary check. For example, hugetlbfs_aops->readpage is NULL. Perhaps we should change read_mapping_page() instead. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120615154325.GA9568@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index f0d04530af63..604930bf9c92 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -610,6 +610,9 @@ __copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *ins if (!filp) return -EINVAL; + if (!mapping->a_ops->readpage) + return -EIO; + idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT); off1 = offset &= ~PAGE_MASK; -- cgit v1.2.3 From 5323ce71e4b4e1f188ebbc0cc7776885ea6c75fb Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 15 Jun 2012 17:43:28 +0200 Subject: uprobes: Write_opcode()->__replace_page() can race with try_to_unmap() write_opcode() gets old_page via get_user_pages() and then calls __replace_page() which assumes that this old_page is still mapped after pte_offset_map_lock(). This is not true if this old_page was already try_to_unmap()'ed, and in this case everything __replace_page() does with old_page is wrong. Just for example, put_page() is not balanced. I think it is possible to teach __replace_page() to handle this unlikely case correctly, but this patch simply changes it to use page_check_address() and return -EAGAIN if it fails. The caller should notice this error code and retry. Note: write_opcode() asks for the cleanups, I'll try to do this in a separate patch. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120615154328.GA9571@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 604930bf9c92..3ccdb29ee8d6 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -129,33 +129,17 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) { struct mm_struct *mm = vma->vm_mm; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *ptep; - spinlock_t *ptl; unsigned long addr; - int err = -EFAULT; + spinlock_t *ptl; + pte_t *ptep; addr = page_address_in_vma(page, vma); if (addr == -EFAULT) - goto out; - - pgd = pgd_offset(mm, addr); - if (!pgd_present(*pgd)) - goto out; - - pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) - goto out; + return -EFAULT; - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) - goto out; - - ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); + ptep = page_check_address(page, mm, addr, &ptl, 0); if (!ptep) - goto out; + return -EAGAIN; get_page(kpage); page_add_new_anon_rmap(kpage, vma, addr); @@ -174,10 +158,8 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct try_to_free_swap(page); put_page(page); pte_unmap_unlock(ptep, ptl); - err = 0; -out: - return err; + return 0; } /** @@ -222,9 +204,10 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, void *vaddr_old, *vaddr_new; struct vm_area_struct *vma; struct uprobe *uprobe; + unsigned long pgoff; loff_t addr; int ret; - +retry: /* Read the page with vaddr into memory */ ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); if (ret <= 0) @@ -269,9 +252,9 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, memcpy(vaddr_new, vaddr_old, PAGE_SIZE); /* poke the new insn in, ASSUMES we don't cross page boundary */ - vaddr &= ~PAGE_MASK; - BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); - memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); + pgoff = (vaddr & ~PAGE_MASK); + BUG_ON(pgoff + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); + memcpy(vaddr_new + pgoff, &opcode, UPROBE_SWBP_INSN_SIZE); kunmap_atomic(vaddr_new); kunmap_atomic(vaddr_old); @@ -291,6 +274,8 @@ unlock_out: put_out: put_page(old_page); + if (unlikely(ret == -EAGAIN)) + goto retry; return ret; } -- cgit v1.2.3 From c1914a0936f79ed0236f670122e06e36e4d332ee Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 15 Jun 2012 17:43:31 +0200 Subject: uprobes: Install_breakpoint() should fail if is_swbp_insn() == T install_breakpoint() returns -EEXIST if is_swbp_insn(orig_insn) == T, the caller treats this code as success. This is doubly wrong. The successful return should set UPROBE_COPY_INSN, but the real problem is that it shouldn't succeed. If the probed insn is int3 the application should get SIGTRAP, this won't happen with uprobe. Probably we can fix this, we can add the UPROBE_SHARED_BP flag and teach handle_swbp/set_orig_insn to handle this case correctly. But this needs some complications and we have other insns which can't be probed, lets make a simple fix for now. I think this needs a cleanup. UPROBE_COPY_INSN should die, copy_insn() should be called by alloc_uprobe(). arch_uprobe_analyze_insn() depends on ->mm (ia32_compat) but it is called only once. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120615154331.GA9578@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 3ccdb29ee8d6..ec78152e32e9 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -693,7 +693,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, return ret; if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) - return -EEXIST; + return -ENOTSUPP; ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, addr); if (ret) -- cgit v1.2.3 From 268720903f87e0b84b161626c4447b81671b5d18 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 15 Jun 2012 17:43:33 +0200 Subject: uprobes: Rework register_for_each_vma() to make it O(n) Currently register_for_each_vma() is O(n ** 2) + O(n ** 3), every time find_next_vma_info() "restarts" the vma_prio_tree_foreach() loop and each iteration rechecks the whole try_list. This also means that try_list can grow "indefinitely" if register/unregister races with munmap/mmap activity even if the number of mapping is bounded at any time. With this patch register_for_each_vma() builds the list of mm/vaddr structures only once and does install_breakpoint() for each entry. We do not care about the new mappings which can be created after build_map_info() drops mapping->i_mmap_mutex, uprobe_mmap() should do its work. Note that we do not allocate map_info under i_mmap_mutex, this can deadlock with page reclaim (but see the next patch). So we use 2 lists, "curr" which we are going to return, and "prev" which holds the already allocated memory. The main loop deques the entry from "prev" (initially it is empty), and if "prev" becomes empty again it counts the number of entries we need to pre-allocate outside of i_mmap_mutex. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Acked-by: Peter Zijlstra Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Link: http://lkml.kernel.org/r/20120615154333.GA9581@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 199 +++++++++++++++++++++--------------------------- 1 file changed, 86 insertions(+), 113 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ec78152e32e9..4e0db3496d70 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -60,17 +60,6 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; */ static atomic_t uprobe_events = ATOMIC_INIT(0); -/* - * Maintain a temporary per vma info that can be used to search if a vma - * has already been handled. This structure is introduced since extending - * vm_area_struct wasnt recommended. - */ -struct vma_info { - struct list_head probe_list; - struct mm_struct *mm; - loff_t vaddr; -}; - struct uprobe { struct rb_node rb_node; /* node in the rb tree */ atomic_t ref; @@ -742,139 +731,123 @@ static void delete_uprobe(struct uprobe *uprobe) atomic_dec(&uprobe_events); } -static struct vma_info * -__find_next_vma_info(struct address_space *mapping, struct list_head *head, - struct vma_info *vi, loff_t offset, bool is_register) +struct map_info { + struct map_info *next; + struct mm_struct *mm; + loff_t vaddr; +}; + +static inline struct map_info *free_map_info(struct map_info *info) { + struct map_info *next = info->next; + kfree(info); + return next; +} + +static struct map_info * +build_map_info(struct address_space *mapping, loff_t offset, bool is_register) +{ + unsigned long pgoff = offset >> PAGE_SHIFT; struct prio_tree_iter iter; struct vm_area_struct *vma; - struct vma_info *tmpvi; - unsigned long pgoff; - int existing_vma; - loff_t vaddr; - - pgoff = offset >> PAGE_SHIFT; + struct map_info *curr = NULL; + struct map_info *prev = NULL; + struct map_info *info; + int more = 0; + again: + mutex_lock(&mapping->i_mmap_mutex); vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { if (!valid_vma(vma, is_register)) continue; - existing_vma = 0; - vaddr = vma_address(vma, offset); - - list_for_each_entry(tmpvi, head, probe_list) { - if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) { - existing_vma = 1; - break; - } - } - - /* - * Another vma needs a probe to be installed. However skip - * installing the probe if the vma is about to be unlinked. - */ - if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) { - vi->mm = vma->vm_mm; - vi->vaddr = vaddr; - list_add(&vi->probe_list, head); - - return vi; + if (!prev) { + more++; + continue; } - } - - return NULL; -} -/* - * Iterate in the rmap prio tree and find a vma where a probe has not - * yet been inserted. - */ -static struct vma_info * -find_next_vma_info(struct address_space *mapping, struct list_head *head, - loff_t offset, bool is_register) -{ - struct vma_info *vi, *retvi; + if (!atomic_inc_not_zero(&vma->vm_mm->mm_users)) + continue; - vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL); - if (!vi) - return ERR_PTR(-ENOMEM); + info = prev; + prev = prev->next; + info->next = curr; + curr = info; - mutex_lock(&mapping->i_mmap_mutex); - retvi = __find_next_vma_info(mapping, head, vi, offset, is_register); + info->mm = vma->vm_mm; + info->vaddr = vma_address(vma, offset); + } mutex_unlock(&mapping->i_mmap_mutex); - if (!retvi) - kfree(vi); + if (!more) + goto out; + + prev = curr; + while (curr) { + mmput(curr->mm); + curr = curr->next; + } - return retvi; + do { + info = kmalloc(sizeof(struct map_info), GFP_KERNEL); + if (!info) { + curr = ERR_PTR(-ENOMEM); + goto out; + } + info->next = prev; + prev = info; + } while (--more); + + goto again; + out: + while (prev) + prev = free_map_info(prev); + return curr; } static int register_for_each_vma(struct uprobe *uprobe, bool is_register) { - struct list_head try_list; - struct vm_area_struct *vma; - struct address_space *mapping; - struct vma_info *vi, *tmpvi; - struct mm_struct *mm; - loff_t vaddr; - int ret; + struct map_info *info; + int err = 0; - mapping = uprobe->inode->i_mapping; - INIT_LIST_HEAD(&try_list); - - ret = 0; + info = build_map_info(uprobe->inode->i_mapping, + uprobe->offset, is_register); + if (IS_ERR(info)) + return PTR_ERR(info); - for (;;) { - vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register); - if (!vi) - break; + while (info) { + struct mm_struct *mm = info->mm; + struct vm_area_struct *vma; + loff_t vaddr; - if (IS_ERR(vi)) { - ret = PTR_ERR(vi); - break; - } + if (err) + goto free; - mm = vi->mm; down_write(&mm->mmap_sem); - vma = find_vma(mm, (unsigned long)vi->vaddr); - if (!vma || !valid_vma(vma, is_register)) { - list_del(&vi->probe_list); - kfree(vi); - up_write(&mm->mmap_sem); - mmput(mm); - continue; - } + vma = find_vma(mm, (unsigned long)info->vaddr); + if (!vma || !valid_vma(vma, is_register)) + goto unlock; + vaddr = vma_address(vma, uprobe->offset); if (vma->vm_file->f_mapping->host != uprobe->inode || - vaddr != vi->vaddr) { - list_del(&vi->probe_list); - kfree(vi); - up_write(&mm->mmap_sem); - mmput(mm); - continue; - } + vaddr != info->vaddr) + goto unlock; - if (is_register) - ret = install_breakpoint(uprobe, mm, vma, vi->vaddr); - else - remove_breakpoint(uprobe, mm, vi->vaddr); - - up_write(&mm->mmap_sem); - mmput(mm); if (is_register) { - if (ret && ret == -EEXIST) - ret = 0; - if (ret) - break; + err = install_breakpoint(uprobe, mm, vma, info->vaddr); + if (err == -EEXIST) + err = 0; + } else { + remove_breakpoint(uprobe, mm, info->vaddr); } + unlock: + up_write(&mm->mmap_sem); + free: + mmput(mm); + info = free_map_info(info); } - list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) { - list_del(&vi->probe_list); - kfree(vi); - } - - return ret; + return err; } static int __uprobe_register(struct uprobe *uprobe) -- cgit v1.2.3 From 7a5bfb66b07f22d2429db776da7bb8b57bfb5cff Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 15 Jun 2012 17:43:36 +0200 Subject: uprobes: Change build_map_info() to try kmalloc(GFP_NOWAIT) first build_map_info() doesn't allocate the memory under i_mmap_mutex to avoid the deadlock with page reclaim. But it can try GFP_NOWAIT first, it should work in the likely case and thus we almost never need the pre-alloc-and-retry path. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Acked-by: Peter Zijlstra Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Link: http://lkml.kernel.org/r/20120615154336.GA9588@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 4e0db3496d70..897417dbca8e 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -761,6 +761,16 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) if (!valid_vma(vma, is_register)) continue; + if (!prev && !more) { + /* + * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through + * reclaim. This is optimistic, no harm done if it fails. + */ + prev = kmalloc(sizeof(struct map_info), + GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN); + if (prev) + prev->next = NULL; + } if (!prev) { more++; continue; -- cgit v1.2.3 From c5784de2b351fe871bb57487878f7fc7ec5b075c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 15 Jun 2012 17:43:39 +0200 Subject: uprobes: Document uprobe_register() vs uprobe_mmap() race Because the mind is treacherous and makes us forget we need to write stuff down. Signed-off-by: Peter Zijlstra Signed-off-by: Oleg Nesterov Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Peter Zijlstra Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/20120615154339.GA9591@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 897417dbca8e..2671d9ad49be 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -44,6 +44,23 @@ static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ #define UPROBES_HASH_SZ 13 +/* + * We need separate register/unregister and mmap/munmap lock hashes because + * of mmap_sem nesting. + * + * uprobe_register() needs to install probes on (potentially) all processes + * and thus needs to acquire multiple mmap_sems (consequtively, not + * concurrently), whereas uprobe_mmap() is called while holding mmap_sem + * for the particular process doing the mmap. + * + * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem + * because of lock order against i_mmap_mutex. This means there's a hole in + * the register vma iteration where a mmap() can happen. + * + * Thus uprobe_register() can race with uprobe_mmap() and we can try and + * install a probe where one is already installed. + */ + /* serialize (un)register */ static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; @@ -339,7 +356,9 @@ out: int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { int result; - + /* + * See the comment near uprobes_hash(). + */ result = is_swbp_at_addr(mm, vaddr); if (result == 1) return -EEXIST; @@ -845,6 +864,10 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) if (is_register) { err = install_breakpoint(uprobe, mm, vma, info->vaddr); + /* + * We can race against uprobe_mmap(), see the + * comment near uprobe_hash(). + */ if (err == -EEXIST) err = 0; } else { @@ -1054,8 +1077,10 @@ int uprobe_mmap(struct vm_area_struct *vma) } ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); - - /* Ignore double add: */ + /* + * We can race against uprobe_register(), see the + * comment near uprobe_hash(). + */ if (ret == -EEXIST) { ret = 0; -- cgit v1.2.3 From d436615e60c386095dac4a9bf72b08868d2a7564 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 15 Jun 2012 17:43:42 +0200 Subject: uprobes: Copy_insn() shouldn't depend on mm/vma/vaddr 1. copy_insn() doesn't need "addr", it can use uprobe->offset. Remove this argument. 2. Change copy_insn/__copy_insn to accept "struct file*" instead of vma. copy_insn() is called only once and mm/vma/vaddr are random, it shouldn't depend on them. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Acked-by: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120615154342.GA9598@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 2671d9ad49be..08ef566da763 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -591,10 +591,9 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) } static int -__copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *insn, +__copy_insn(struct address_space *mapping, struct file *filp, char *insn, unsigned long nbytes, unsigned long offset) { - struct file *filp = vma->vm_file; struct page *page; void *vaddr; unsigned long off1; @@ -625,15 +624,13 @@ __copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *ins return 0; } -static int -copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) +static int copy_insn(struct uprobe *uprobe, struct file *filp) { struct address_space *mapping; unsigned long nbytes; int bytes; - addr &= ~PAGE_MASK; - nbytes = PAGE_SIZE - addr; + nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK); mapping = uprobe->inode->i_mapping; /* Instruction at end of binary; copy only available bytes */ @@ -644,13 +641,13 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) /* Instruction at the page-boundary; copy bytes in second page */ if (nbytes < bytes) { - if (__copy_insn(mapping, vma, uprobe->arch.insn + nbytes, + if (__copy_insn(mapping, filp, uprobe->arch.insn + nbytes, bytes - nbytes, uprobe->offset + nbytes)) return -ENOMEM; bytes = nbytes; } - return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset); + return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); } /* @@ -696,7 +693,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, addr = (unsigned long)vaddr; if (!(uprobe->flags & UPROBE_COPY_INSN)) { - ret = copy_insn(uprobe, vma, addr); + ret = copy_insn(uprobe, vma->vm_file); if (ret) return ret; -- cgit v1.2.3 From fc36f59565861af2e897225bc3782479a26c5d5a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 15 Jun 2012 17:43:44 +0200 Subject: uprobes: Copy_insn() should not return -ENOMEM if __copy_insn() fails copy_insn() returns -ENOMEM if the first __copy_insn() fails, it should return the correct error code. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Acked-by: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120615154344.GA9601@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 08ef566da763..2db1d94d7dfc 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -641,10 +641,10 @@ static int copy_insn(struct uprobe *uprobe, struct file *filp) /* Instruction at the page-boundary; copy bytes in second page */ if (nbytes < bytes) { - if (__copy_insn(mapping, filp, uprobe->arch.insn + nbytes, - bytes - nbytes, uprobe->offset + nbytes)) - return -ENOMEM; - + int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes, + bytes - nbytes, uprobe->offset + nbytes); + if (err) + return err; bytes = nbytes; } return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); -- cgit v1.2.3 From eb2bf57bee42c7565032f93adaa211e2c9fcc52c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 15 Jun 2012 17:43:47 +0200 Subject: uprobes: No need to re-check vma_address() in write_opcode() write_opcode() is called by register_for_each_vma() and uprobe_mmap() paths. In both cases the caller has already verified this vaddr under mmap_sem, no need to re-check. Note also that this check is wrong anyway, we should not truncate loff_t returned by vma_address() if we do not trust this mapping. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Acked-by: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120615154347.GA9604@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 2db1d94d7dfc..14c71a2aadad 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -211,7 +211,6 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, struct vm_area_struct *vma; struct uprobe *uprobe; unsigned long pgoff; - loff_t addr; int ret; retry: /* Read the page with vaddr into memory */ @@ -235,10 +234,6 @@ retry: if (mapping != vma->vm_file->f_mapping) goto put_out; - addr = vma_address(vma, uprobe->offset); - if (vaddr != (unsigned long)addr) - goto put_out; - ret = -ENOMEM; new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); if (!new_page) -- cgit v1.2.3 From d9c4a30e82614d43b55893a73f31e7284007ce82 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 15 Jun 2012 17:43:50 +0200 Subject: uprobes: Move BUG_ON(UPROBE_SWBP_INSN_SIZE) from write_opcode() to install_breakpoint() write_opcode() ensures that UPROBE_SWBP_INSN doesn't cross the page boundary. This looks a bit confusing, the check does not depend on vaddr and it is enough to do it only once right after install_breakpoint()->arch_uprobe_analyze_insn(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Acked-by: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120615154350.GA9611@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 14c71a2aadad..b9c61bda9029 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -210,7 +210,6 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, void *vaddr_old, *vaddr_new; struct vm_area_struct *vma; struct uprobe *uprobe; - unsigned long pgoff; int ret; retry: /* Read the page with vaddr into memory */ @@ -251,11 +250,7 @@ retry: vaddr_new = kmap_atomic(new_page); memcpy(vaddr_new, vaddr_old, PAGE_SIZE); - - /* poke the new insn in, ASSUMES we don't cross page boundary */ - pgoff = (vaddr & ~PAGE_MASK); - BUG_ON(pgoff + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); - memcpy(vaddr_new + pgoff, &opcode, UPROBE_SWBP_INSN_SIZE); + memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE); kunmap_atomic(vaddr_new); kunmap_atomic(vaddr_old); @@ -699,6 +694,10 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, if (ret) return ret; + /* write_opcode() assumes we don't cross page boundary */ + BUG_ON((uprobe->offset & ~PAGE_MASK) + + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); + uprobe->flags |= UPROBE_COPY_INSN; } -- cgit v1.2.3 From 449d0d7c9fb87277175db34c009c70cb348004a8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 15 Jun 2012 17:43:53 +0200 Subject: uprobes: Simplify the usage of uprobe->pending_list uprobe->pending_list is only used to create the temporary list, it has no meaning after we drop uprobes_mmap_hash(inode). No need to initialize this node or remove it from tmp_list, and we can use list_for_each_entry(). Signed-off-by: Oleg Nesterov Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Peter Zijlstra Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/20120615154353.GA9614@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index b9c61bda9029..7d5c78f063ae 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -513,7 +513,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) uprobe->inode = igrab(inode); uprobe->offset = offset; init_rwsem(&uprobe->consumer_rwsem); - INIT_LIST_HEAD(&uprobe->pending_list); /* add to uprobes_tree, sorted on inode:offset */ cur_uprobe = insert_uprobe(uprobe); @@ -1037,7 +1036,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head) int uprobe_mmap(struct vm_area_struct *vma) { struct list_head tmp_list; - struct uprobe *uprobe, *u; + struct uprobe *uprobe; struct inode *inode; int ret, count; @@ -1055,10 +1054,9 @@ int uprobe_mmap(struct vm_area_struct *vma) ret = 0; count = 0; - list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { + list_for_each_entry(uprobe, &tmp_list, pending_list) { loff_t vaddr; - list_del(&uprobe->pending_list); if (!ret) { vaddr = vma_address(vma, uprobe->offset); @@ -1106,7 +1104,7 @@ int uprobe_mmap(struct vm_area_struct *vma) void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct list_head tmp_list; - struct uprobe *uprobe, *u; + struct uprobe *uprobe; struct inode *inode; if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) @@ -1123,12 +1121,10 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon mutex_lock(uprobes_mmap_hash(inode)); build_probe_list(inode, &tmp_list); - list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { + list_for_each_entry(uprobe, &tmp_list, pending_list) { loff_t vaddr; - list_del(&uprobe->pending_list); vaddr = vma_address(vma, uprobe->offset); - if (vaddr >= start && vaddr < end) { /* * An unregister could have removed the probe before -- cgit v1.2.3 From 816c03fbabe64fa09f66fbb64e932081af381415 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 15 Jun 2012 17:43:55 +0200 Subject: uprobes: Don't use loff_t for the valid virtual address loff_t looks confusing when it is used for the virtual address. Change map_info and install_breakpoint/remove_breakpoint paths to use "unsigned long". The patch doesn't change vma_address(), it can't return "long" because it is used to verify the mapping. But probably this needs some cleanups too. Signed-off-by: Oleg Nesterov Signed-off-by: Anton Arapov Acked-by: Srikar Dronamraju Acked-by: Ananth N Mavinakayanahalli Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120615154355.GA9622@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 7d5c78f063ae..4df84b76dd48 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -664,9 +664,8 @@ static int copy_insn(struct uprobe *uprobe, struct file *filp) */ static int install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, - struct vm_area_struct *vma, loff_t vaddr) + struct vm_area_struct *vma, unsigned long vaddr) { - unsigned long addr; int ret; /* @@ -679,8 +678,6 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, if (!uprobe->consumers) return -EEXIST; - addr = (unsigned long)vaddr; - if (!(uprobe->flags & UPROBE_COPY_INSN)) { ret = copy_insn(uprobe, vma->vm_file); if (ret) @@ -689,7 +686,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) return -ENOTSUPP; - ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, addr); + ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); if (ret) return ret; @@ -709,7 +706,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, * Hence increment before and decrement on failure. */ atomic_inc(&mm->uprobes_state.count); - ret = set_swbp(&uprobe->arch, mm, addr); + ret = set_swbp(&uprobe->arch, mm, vaddr); if (ret) atomic_dec(&mm->uprobes_state.count); @@ -717,9 +714,9 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, } static void -remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr) +remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) { - if (!set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true)) + if (!set_orig_insn(&uprobe->arch, mm, vaddr, true)) atomic_dec(&mm->uprobes_state.count); } @@ -743,7 +740,7 @@ static void delete_uprobe(struct uprobe *uprobe) struct map_info { struct map_info *next; struct mm_struct *mm; - loff_t vaddr; + unsigned long vaddr; }; static inline struct map_info *free_map_info(struct map_info *info) @@ -837,7 +834,6 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) while (info) { struct mm_struct *mm = info->mm; struct vm_area_struct *vma; - loff_t vaddr; if (err) goto free; @@ -847,9 +843,8 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) if (!vma || !valid_vma(vma, is_register)) goto unlock; - vaddr = vma_address(vma, uprobe->offset); if (vma->vm_file->f_mapping->host != uprobe->inode || - vaddr != info->vaddr) + vma_address(vma, uprobe->offset) != info->vaddr) goto unlock; if (is_register) { @@ -1055,10 +1050,8 @@ int uprobe_mmap(struct vm_area_struct *vma) count = 0; list_for_each_entry(uprobe, &tmp_list, pending_list) { - loff_t vaddr; - if (!ret) { - vaddr = vma_address(vma, uprobe->offset); + loff_t vaddr = vma_address(vma, uprobe->offset); if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { put_uprobe(uprobe); @@ -1122,9 +1115,8 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon build_probe_list(inode, &tmp_list); list_for_each_entry(uprobe, &tmp_list, pending_list) { - loff_t vaddr; + loff_t vaddr = vma_address(vma, uprobe->offset); - vaddr = vma_address(vma, uprobe->offset); if (vaddr >= start && vaddr < end) { /* * An unregister could have removed the probe before -- cgit v1.2.3 From 593609a59600c8377f311b300f14deacb155b9a4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 15 Jun 2012 17:43:59 +0200 Subject: uprobes: __copy_insn() needs "loff_t offset" 1. __copy_insn() needs "loff_t offset", not "unsigned long", to read the file. 2. use pgoff_t for "idx" and remove the unnecessary typecast. 3. fix the typo, "&=" is not what we want 4. can't resist, rename off1 to off. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Acked-by: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120615154359.GA9625@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 4df84b76dd48..d1b2eeb80837 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -581,12 +581,12 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) static int __copy_insn(struct address_space *mapping, struct file *filp, char *insn, - unsigned long nbytes, unsigned long offset) + unsigned long nbytes, loff_t offset) { struct page *page; void *vaddr; - unsigned long off1; - unsigned long idx; + unsigned long off; + pgoff_t idx; if (!filp) return -EINVAL; @@ -594,8 +594,8 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn, if (!mapping->a_ops->readpage) return -EIO; - idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT); - off1 = offset &= ~PAGE_MASK; + idx = offset >> PAGE_CACHE_SHIFT; + off = offset & ~PAGE_MASK; /* * Ensure that the page that has the original instruction is @@ -606,7 +606,7 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn, return PTR_ERR(page); vaddr = kmap_atomic(page); - memcpy(insn, vaddr + off1, nbytes); + memcpy(insn, vaddr + off, nbytes); kunmap_atomic(vaddr); page_cache_release(page); -- cgit v1.2.3 From e227051b13956b8f71c0abecc41ad351e64671c8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 15 Jun 2012 17:44:01 +0200 Subject: uprobes: Remove the unnecessary initialization in add_utask() Trivial cleanup. No need to nullify ->active_uprobe after kzalloc(). Signed-off-by: Oleg Nesterov Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Peter Zijlstra Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/20120615154401.GA9633@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index d1b2eeb80837..f93532748bca 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1392,7 +1392,6 @@ static struct uprobe_task *add_utask(void) if (unlikely(!utask)) return NULL; - utask->active_uprobe = NULL; current->utask = utask; return utask; } -- cgit v1.2.3 From 650513979a437c32d7a0a84f0ed952a55bbb5583 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sat, 16 Jun 2012 21:47:37 -0700 Subject: x86-64, reboot: Allow reboot=bios and reboot-cpu override on x86-64 With the revamped realmode trampoline code, it is trivial to extend support for reboot=bios to x86-64. Furthermore, while we are at it, remove the restriction that only we can only override the reboot CPU on 32 bits. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/n/tip-jopx7y6g6dbcx4tpal8q0jlr@git.kernel.org --- arch/x86/include/asm/emergency-restart.h | 2 - arch/x86/include/asm/realmode.h | 3 +- arch/x86/include/asm/reboot.h | 4 +- arch/x86/kernel/reboot.c | 52 +++++------ arch/x86/realmode/rm/Makefile | 2 +- arch/x86/realmode/rm/header.S | 4 +- arch/x86/realmode/rm/reboot.S | 152 +++++++++++++++++++++++++++++++ arch/x86/realmode/rm/reboot_32.S | 132 --------------------------- 8 files changed, 184 insertions(+), 167 deletions(-) create mode 100644 arch/x86/realmode/rm/reboot.S delete mode 100644 arch/x86/realmode/rm/reboot_32.S diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h index cc70c1c78ca4..75ce3f47d204 100644 --- a/arch/x86/include/asm/emergency-restart.h +++ b/arch/x86/include/asm/emergency-restart.h @@ -4,9 +4,7 @@ enum reboot_type { BOOT_TRIPLE = 't', BOOT_KBD = 'k', -#ifdef CONFIG_X86_32 BOOT_BIOS = 'b', -#endif BOOT_ACPI = 'a', BOOT_EFI = 'e', BOOT_CF9 = 'p', diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index fce3f4ae5bd6..fe1ec5bcd846 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -21,8 +21,9 @@ struct real_mode_header { u32 wakeup_header; #endif /* APM/BIOS reboot */ -#ifdef CONFIG_X86_32 u32 machine_real_restart_asm; +#ifdef CONFIG_X86_64 + u32 machine_real_restart_seg; #endif }; diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 92f297069e87..a82c4f1b4d83 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -18,8 +18,8 @@ extern struct machine_ops machine_ops; void native_machine_crash_shutdown(struct pt_regs *regs); void native_machine_shutdown(void); -void machine_real_restart(unsigned int type); -/* These must match dispatch_table in reboot_32.S */ +void __noreturn machine_real_restart(unsigned int type); +/* These must match dispatch in arch/x86/realmore/rm/reboot.S */ #define MRR_BIOS 0 #define MRR_APM 1 diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 25b48edb847c..6ddb9cd0ced0 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -20,14 +20,12 @@ #include #include #include +#include -#ifdef CONFIG_X86_32 -# include -# include -# include -#else -# include -#endif +#include +#include +#include +#include /* * Power off function, if any @@ -49,7 +47,7 @@ int reboot_force; */ static int reboot_default = 1; -#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) +#ifdef CONFIG_SMP static int reboot_cpu = -1; #endif @@ -67,8 +65,8 @@ bool port_cf9_safe = false; * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] * warm Don't set the cold reboot flag * cold Set the cold reboot flag - * bios Reboot by jumping through the BIOS (only for X86_32) - * smp Reboot by executing reset on BSP or other CPU (only for X86_32) + * bios Reboot by jumping through the BIOS + * smp Reboot by executing reset on BSP or other CPU * triple Force a triple fault (init) * kbd Use the keyboard controller. cold reset (default) * acpi Use the RESET_REG in the FADT @@ -95,7 +93,6 @@ static int __init reboot_setup(char *str) reboot_mode = 0; break; -#ifdef CONFIG_X86_32 #ifdef CONFIG_SMP case 's': if (isdigit(*(str+1))) { @@ -112,7 +109,6 @@ static int __init reboot_setup(char *str) #endif /* CONFIG_SMP */ case 'b': -#endif case 'a': case 'k': case 't': @@ -138,7 +134,6 @@ static int __init reboot_setup(char *str) __setup("reboot=", reboot_setup); -#ifdef CONFIG_X86_32 /* * Reboot options and system auto-detection code provided by * Dell Inc. so their systems "just work". :-) @@ -157,11 +152,8 @@ static int __init set_bios_reboot(const struct dmi_system_id *d) return 0; } -void machine_real_restart(unsigned int type) +void __noreturn machine_real_restart(unsigned int type) { - void (*restart_lowmem)(unsigned int) = (void (*)(unsigned int)) - real_mode_header->machine_real_restart_asm; - local_irq_disable(); /* @@ -181,7 +173,11 @@ void machine_real_restart(unsigned int type) /* * Switch back to the initial page table. */ +#ifdef CONFIG_X86_32 load_cr3(initial_page_table); +#else + write_cr3(real_mode_header->trampoline_pgd); +#endif /* * Write 0x1234 to absolute memory location 0x472. The BIOS reads @@ -192,14 +188,21 @@ void machine_real_restart(unsigned int type) *((unsigned short *)0x472) = reboot_mode; /* Jump to the identity-mapped low memory code */ - restart_lowmem(type); +#ifdef CONFIG_X86_32 + asm volatile("jmpl *%0" : : + "rm" (real_mode_header->machine_real_restart_asm), + "a" (type)); +#else + asm volatile("ljmpl *%0" : : + "m" (real_mode_header->machine_real_restart_asm), + "D" (type)); +#endif + unreachable(); } #ifdef CONFIG_APM_MODULE EXPORT_SYMBOL(machine_real_restart); #endif -#endif /* CONFIG_X86_32 */ - /* * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot */ @@ -223,11 +226,9 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d) } /* - * This is a single dmi_table handling all reboot quirks. Note that - * REBOOT_BIOS is only available for 32bit + * This is a single dmi_table handling all reboot quirks. */ static struct dmi_system_id __initdata reboot_dmi_table[] = { -#ifdef CONFIG_X86_32 { /* Handle problems with rebooting on Dell E520's */ .callback = set_bios_reboot, .ident = "Dell E520", @@ -377,7 +378,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "P4S800"), }, }, -#endif /* CONFIG_X86_32 */ { /* Handle reboot issue on Acer Aspire one */ .callback = set_kbd_reboot, @@ -576,13 +576,11 @@ static void native_machine_emergency_restart(void) reboot_type = BOOT_KBD; break; -#ifdef CONFIG_X86_32 case BOOT_BIOS: machine_real_restart(MRR_BIOS); reboot_type = BOOT_KBD; break; -#endif case BOOT_ACPI: acpi_reboot(); @@ -624,12 +622,10 @@ void native_machine_shutdown(void) /* The boot cpu is always logical cpu 0 */ int reboot_cpu_id = 0; -#ifdef CONFIG_X86_32 /* See if there has been given a command line override */ if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) && cpu_online(reboot_cpu)) reboot_cpu_id = reboot_cpu; -#endif /* Make certain the cpu I'm about to reboot on is online */ if (!cpu_online(reboot_cpu_id)) diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 5b84a2d30888..b2d534cab25f 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -22,7 +22,7 @@ wakeup-objs += video-bios.o realmode-y += header.o realmode-y += trampoline_$(BITS).o realmode-y += stack.o -realmode-$(CONFIG_X86_32) += reboot_32.o +realmode-y += reboot.o realmode-$(CONFIG_ACPI_SLEEP) += $(wakeup-objs) targets += $(realmode-y) diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S index fadf48378ada..a28221d94e69 100644 --- a/arch/x86/realmode/rm/header.S +++ b/arch/x86/realmode/rm/header.S @@ -6,6 +6,7 @@ #include #include +#include #include "realmode.h" @@ -28,8 +29,9 @@ GLOBAL(real_mode_header) .long pa_wakeup_header #endif /* APM/BIOS reboot */ -#ifdef CONFIG_X86_32 .long pa_machine_real_restart_asm +#ifdef CONFIG_X86_64 + .long __KERNEL32_CS #endif END(real_mode_header) diff --git a/arch/x86/realmode/rm/reboot.S b/arch/x86/realmode/rm/reboot.S new file mode 100644 index 000000000000..6bf8feac5557 --- /dev/null +++ b/arch/x86/realmode/rm/reboot.S @@ -0,0 +1,152 @@ +#include +#include +#include +#include +#include +#include +#include "realmode.h" + +/* + * The following code and data reboots the machine by switching to real + * mode and jumping to the BIOS reset entry point, as if the CPU has + * really been reset. The previous version asked the keyboard + * controller to pulse the CPU reset line, which is more thorough, but + * doesn't work with at least one type of 486 motherboard. It is easy + * to stop this code working; hence the copious comments. + * + * This code is called with the restart type (0 = BIOS, 1 = APM) in + * the primary argument register (%eax for 32 bit, %edi for 64 bit). + */ + .section ".text32", "ax" + .code32 +ENTRY(machine_real_restart_asm) + +#ifdef CONFIG_X86_64 + + /* Disable paging to drop us out of long mode */ + movl %cr0, %eax + andl $~X86_CR0_PG, %eax + movl %eax, %cr0 + jmp 1f /* "A branch" may be needed here, assume near is OK */ + +1: + xorl %eax, %eax + xorl %edx, %edx + movl $MSR_EFER, %ecx + wrmsr + + movl %edi, %eax + +#endif /* CONFIG_X86_64 */ + + /* Set up the IDT for real mode. */ + lidtl pa_machine_real_restart_idt + + /* + * Set up a GDT from which we can load segment descriptors for real + * mode. The GDT is not used in real mode; it is just needed here to + * prepare the descriptors. + */ + lgdtl pa_machine_real_restart_gdt + + /* + * Load the data segment registers with 16-bit compatible values + */ + movl $16, %ecx + movl %ecx, %ds + movl %ecx, %es + movl %ecx, %fs + movl %ecx, %gs + movl %ecx, %ss + ljmpw $8, $1f + +/* + * This is 16-bit protected mode code to disable paging and the cache, + * switch to real mode and jump to the BIOS reset code. + * + * The instruction that switches to real mode by writing to CR0 must be + * followed immediately by a far jump instruction, which set CS to a + * valid value for real mode, and flushes the prefetch queue to avoid + * running instructions that have already been decoded in protected + * mode. + * + * Clears all the flags except ET, especially PG (paging), PE + * (protected-mode enable) and TS (task switch for coprocessor state + * save). Flushes the TLB after paging has been disabled. Sets CD and + * NW, to disable the cache on a 486, and invalidates the cache. This + * is more like the state of a 486 after reset. I don't know if + * something else should be done for other chips. + * + * More could be done here to set up the registers as if a CPU reset had + * occurred; hopefully real BIOSs don't assume much. This is not the + * actual BIOS entry point, anyway (that is at 0xfffffff0). + * + * Most of this work is probably excessive, but it is what is tested. + */ + .text + .code16 + + .balign 16 +machine_real_restart_asm16: +1: + xorl %ecx, %ecx + movl %cr0, %edx + andl $0x00000011, %edx + orl $0x60000000, %edx + movl %edx, %cr0 + movl %ecx, %cr3 + movl %cr0, %edx + testl $0x60000000, %edx /* If no cache bits -> no wbinvd */ + jz 2f + wbinvd +2: + andb $0x10, %dl + movl %edx, %cr0 + LJMPW_RM(3f) +3: + andw %ax, %ax + jz bios + +apm: + movw $0x1000, %ax + movw %ax, %ss + movw $0xf000, %sp + movw $0x5307, %ax + movw $0x0001, %bx + movw $0x0003, %cx + int $0x15 + /* This should never return... */ + +bios: + ljmpw $0xf000, $0xfff0 + + .section ".rodata", "a" + + .balign 16 +GLOBAL(machine_real_restart_idt) + .word 0xffff /* Length - real mode default value */ + .long 0 /* Base - real mode default value */ +END(machine_real_restart_idt) + + .balign 16 +GLOBAL(machine_real_restart_gdt) + /* Self-pointer */ + .word 0xffff /* Length - real mode default value */ + .long pa_machine_real_restart_gdt + .word 0 + + /* + * 16-bit code segment pointing to real_mode_seg + * Selector value 8 + */ + .word 0xffff /* Limit */ + .long 0x9b000000 + pa_real_mode_base + .word 0 + + /* + * 16-bit data segment with the selector value 16 = 0x10 and + * base value 0x100; since this is consistent with real mode + * semantics we don't have to reload the segments once CR0.PE = 0. + */ + .quad GDT_ENTRY(0x0093, 0x100, 0xffff) +END(machine_real_restart_gdt) diff --git a/arch/x86/realmode/rm/reboot_32.S b/arch/x86/realmode/rm/reboot_32.S deleted file mode 100644 index 114044876b3d..000000000000 --- a/arch/x86/realmode/rm/reboot_32.S +++ /dev/null @@ -1,132 +0,0 @@ -#include -#include -#include -#include -#include "realmode.h" - -/* - * The following code and data reboots the machine by switching to real - * mode and jumping to the BIOS reset entry point, as if the CPU has - * really been reset. The previous version asked the keyboard - * controller to pulse the CPU reset line, which is more thorough, but - * doesn't work with at least one type of 486 motherboard. It is easy - * to stop this code working; hence the copious comments. - * - * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax. - */ - .section ".text32", "ax" - .code32 - - .balign 16 -ENTRY(machine_real_restart_asm) - /* Set up the IDT for real mode. */ - lidtl pa_machine_real_restart_idt - - /* - * Set up a GDT from which we can load segment descriptors for real - * mode. The GDT is not used in real mode; it is just needed here to - * prepare the descriptors. - */ - lgdtl pa_machine_real_restart_gdt - - /* - * Load the data segment registers with 16-bit compatible values - */ - movl $16, %ecx - movl %ecx, %ds - movl %ecx, %es - movl %ecx, %fs - movl %ecx, %gs - movl %ecx, %ss - ljmpw $8, $1f - -/* - * This is 16-bit protected mode code to disable paging and the cache, - * switch to real mode and jump to the BIOS reset code. - * - * The instruction that switches to real mode by writing to CR0 must be - * followed immediately by a far jump instruction, which set CS to a - * valid value for real mode, and flushes the prefetch queue to avoid - * running instructions that have already been decoded in protected - * mode. - * - * Clears all the flags except ET, especially PG (paging), PE - * (protected-mode enable) and TS (task switch for coprocessor state - * save). Flushes the TLB after paging has been disabled. Sets CD and - * NW, to disable the cache on a 486, and invalidates the cache. This - * is more like the state of a 486 after reset. I don't know if - * something else should be done for other chips. - * - * More could be done here to set up the registers as if a CPU reset had - * occurred; hopefully real BIOSs don't assume much. This is not the - * actual BIOS entry point, anyway (that is at 0xfffffff0). - * - * Most of this work is probably excessive, but it is what is tested. - */ - .text - .code16 - - .balign 16 -machine_real_restart_asm16: -1: - xorl %ecx, %ecx - movl %cr0, %edx - andl $0x00000011, %edx - orl $0x60000000, %edx - movl %edx, %cr0 - movl %ecx, %cr3 - movl %cr0, %edx - testl $0x60000000, %edx /* If no cache bits -> no wbinvd */ - jz 2f - wbinvd -2: - andb $0x10, %dl - movl %edx, %cr0 - LJMPW_RM(3f) -3: - andw %ax, %ax - jz bios - -apm: - movw $0x1000, %ax - movw %ax, %ss - movw $0xf000, %sp - movw $0x5307, %ax - movw $0x0001, %bx - movw $0x0003, %cx - int $0x15 - /* This should never return... */ - -bios: - ljmpw $0xf000, $0xfff0 - - .section ".rodata", "a" - - .balign 16 -GLOBAL(machine_real_restart_idt) - .word 0xffff /* Length - real mode default value */ - .long 0 /* Base - real mode default value */ -END(machine_real_restart_idt) - - .balign 16 -GLOBAL(machine_real_restart_gdt) - /* Self-pointer */ - .word 0xffff /* Length - real mode default value */ - .long pa_machine_real_restart_gdt - .word 0 - - /* - * 16-bit code segment pointing to real_mode_seg - * Selector value 8 - */ - .word 0xffff /* Limit */ - .long 0x9b000000 + pa_real_mode_base - .word 0 - - /* - * 16-bit data segment with the selector value 16 = 0x10 and - * base value 0x100; since this is consistent with real mode - * semantics we don't have to reload the segments once CR0.PE = 0. - */ - .quad GDT_ENTRY(0x0093, 0x100, 0xffff) -END(machine_real_restart_gdt) -- cgit v1.2.3 From abf71f3066740f3b59c3f731b4b68ed335f7b24d Mon Sep 17 00:00:00 2001 From: Ido Yariv Date: Fri, 15 Jun 2012 18:10:55 +0300 Subject: x86/vsmp: Fix vector_allocation_domain's return value Commit 8637e38a ("x86/apic: Avoid useless scanning thru a cpumask in assign_irq_vector()") modified vector_allocation_domain() to return a boolean indicating if cpumask is dynamic or static. Adjust vSMP's callback implementation accordingly. Signed-off-by: Ido Yariv Acked-by: Shai Fultheim Cc: Alexander Gordeev Link: http://lkml.kernel.org/r/1339773055-27397-1-git-send-email-ido@wizery.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsmp_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 3f0285ac00fa..fa5adb7c228c 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -208,9 +208,10 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) * In vSMP, all cpus should be capable of handling interrupts, regardless of * the APIC used. */ -static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask) +static bool fill_vector_allocation_domain(int cpu, struct cpumask *retmask) { cpumask_setall(retmask); + return false; } static void vsmp_apic_post_init(void) -- cgit v1.2.3 From 76958a61e42fb6277a8431eb17e4bdb24176f1b7 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 15 Jun 2012 19:06:44 +0200 Subject: perf/x86/amd: Fix RDPMC index calculation for AMD family 15h The RDPMC index calculation is wrong for AMD family 15h (X86_FEATURE_ PERFCTR_CORE set). This leads to a #GP when accessing the counter: Pid: 2237, comm: syslog-ng Not tainted 3.5.0-rc1-perf-x86_64-standard-g130ff90 #135 AMD Pike/Pike RIP: 0010:[] [] x86_perf_event_update+0x27/0x66 While the msr address offset is (index << 1) we must use index to select the correct rdpmc. Signed-off-by: Robert Richter Acked-by: Peter Zijlstra Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 766c76d5ec4c..d1f38c9509d0 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -823,7 +823,7 @@ static inline void x86_assign_hw_event(struct perf_event *event, } else { hwc->config_base = x86_pmu_config_addr(hwc->idx); hwc->event_base = x86_pmu_event_addr(hwc->idx); - hwc->event_base_rdpmc = x86_pmu_addr_offset(hwc->idx); + hwc->event_base_rdpmc = hwc->idx; } } -- cgit v1.2.3 From 4b4969b14490a4f65b572b8f180164181104b5e1 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 15 Jun 2012 14:31:30 +0800 Subject: perf: Export perf_assign_events() Export perf_assign_events() so the uncore code can use it to schedule events. Signed-off-by: Zheng Yan Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1339741902-8449-2-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 6 +++--- arch/x86/kernel/cpu/perf_event.h | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index d1f38c9509d0..6d32aefc9dbd 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -626,7 +626,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) c = sched->constraints[sched->state.event]; /* Prefer fixed purpose counters */ - if (x86_pmu.num_counters_fixed) { + if (c->idxmsk64 & (~0ULL << X86_PMC_IDX_FIXED)) { idx = X86_PMC_IDX_FIXED; for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { if (!__test_and_set_bit(idx, sched->state.used)) @@ -693,8 +693,8 @@ static bool perf_sched_next_event(struct perf_sched *sched) /* * Assign a counter for each event. */ -static int perf_assign_events(struct event_constraint **constraints, int n, - int wmin, int wmax, int *assign) +int perf_assign_events(struct event_constraint **constraints, int n, + int wmin, int wmax, int *assign) { struct perf_sched sched; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 3df3de9452a9..83238f2a12b2 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -481,6 +481,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, void x86_pmu_enable_all(int added); +int perf_assign_events(struct event_constraint **constraints, int n, + int wmin, int wmax, int *assign); int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); void x86_pmu_stop(struct perf_event *event, int flags); -- cgit v1.2.3 From fbfc623f8231c8d8c78aab5841e9c6e5811ab638 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 15 Jun 2012 14:31:31 +0800 Subject: perf: Avoid race between cpu hotplug and installing event perf_event_open() requires the cpu on which to install event is online, but the cpu can go offline after perf_event_open checks that. Add a get_online_cpus()/put_online_cpus() pair to avoid the race. Signed-off-by: Zheng Yan Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1339741902-8449-3-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index d7d71d6ec972..31d182e01549 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6252,6 +6252,8 @@ SYSCALL_DEFINE5(perf_event_open, } } + get_online_cpus(); + event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL, NULL); if (IS_ERR(event)) { @@ -6391,6 +6393,8 @@ SYSCALL_DEFINE5(perf_event_open, perf_unpin_context(ctx); mutex_unlock(&ctx->mutex); + put_online_cpus(); + event->owner = current; mutex_lock(¤t->perf_event_mutex); @@ -6419,6 +6423,7 @@ err_context: err_alloc: free_event(event); err_task: + put_online_cpus(); if (task) put_task_struct(task); err_group_fd: -- cgit v1.2.3 From e2d37cd213dcc0aeb3db4b37b9bd1710fe36fbf7 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 15 Jun 2012 14:31:32 +0800 Subject: perf: Allow the PMU driver to choose the CPU on which to install events Allow the pmu->event_init callback to change event->cpu, so the PMU driver can choose the CPU on which to install events. Signed-off-by: Zheng Yan Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1339741902-8449-4-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 31d182e01549..fa36a39e8bb7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6306,7 +6306,7 @@ SYSCALL_DEFINE5(perf_event_open, /* * Get the target context (task or percpu): */ - ctx = find_get_context(pmu, task, cpu); + ctx = find_get_context(pmu, task, event->cpu); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err_alloc; @@ -6379,16 +6379,16 @@ SYSCALL_DEFINE5(perf_event_open, mutex_lock(&ctx->mutex); if (move_group) { - perf_install_in_context(ctx, group_leader, cpu); + perf_install_in_context(ctx, group_leader, event->cpu); get_ctx(ctx); list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { - perf_install_in_context(ctx, sibling, cpu); + perf_install_in_context(ctx, sibling, event->cpu); get_ctx(ctx); } } - perf_install_in_context(ctx, event, cpu); + perf_install_in_context(ctx, event, event->cpu); ++ctx->generation; perf_unpin_context(ctx); mutex_unlock(&ctx->mutex); -- cgit v1.2.3 From 0cda4c023132aa93f2dd94811061f812e88daf4c Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 15 Jun 2012 14:31:33 +0800 Subject: perf: Introduce perf_pmu_migrate_context() Originally from Peter Zijlstra. The helper migrates perf events from one cpu to another cpu. Signed-off-by: Zheng Yan Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1339741902-8449-5-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 ++ kernel/events/core.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1ce887abcc5c..76c5c8b724a7 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1107,6 +1107,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, struct task_struct *task, perf_overflow_handler_t callback, void *context); +extern void perf_pmu_migrate_context(struct pmu *pmu, + int src_cpu, int dst_cpu); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); diff --git a/kernel/events/core.c b/kernel/events/core.c index fa36a39e8bb7..f1cf0edeb39a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1645,6 +1645,8 @@ perf_install_in_context(struct perf_event_context *ctx, lockdep_assert_held(&ctx->mutex); event->ctx = ctx; + if (event->cpu != -1) + event->cpu = cpu; if (!task) { /* @@ -6379,6 +6381,7 @@ SYSCALL_DEFINE5(perf_event_open, mutex_lock(&ctx->mutex); if (move_group) { + synchronize_rcu(); perf_install_in_context(ctx, group_leader, event->cpu); get_ctx(ctx); list_for_each_entry(sibling, &group_leader->sibling_list, @@ -6484,6 +6487,39 @@ err: } EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); +void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) +{ + struct perf_event_context *src_ctx; + struct perf_event_context *dst_ctx; + struct perf_event *event, *tmp; + LIST_HEAD(events); + + src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; + dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; + + mutex_lock(&src_ctx->mutex); + list_for_each_entry_safe(event, tmp, &src_ctx->event_list, + event_entry) { + perf_remove_from_context(event); + put_ctx(src_ctx); + list_add(&event->event_entry, &events); + } + mutex_unlock(&src_ctx->mutex); + + synchronize_rcu(); + + mutex_lock(&dst_ctx->mutex); + list_for_each_entry_safe(event, tmp, &events, event_entry) { + list_del(&event->event_entry); + if (event->state >= PERF_EVENT_STATE_OFF) + event->state = PERF_EVENT_STATE_INACTIVE; + perf_install_in_context(dst_ctx, event, dst_cpu); + get_ctx(dst_ctx); + } + mutex_unlock(&dst_ctx->mutex); +} +EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); + static void sync_child_event(struct perf_event *child_event, struct task_struct *child) { -- cgit v1.2.3 From 087bfbb032691262f2f7d52b910652450c5554b8 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 15 Jun 2012 14:31:34 +0800 Subject: perf/x86: Add generic Intel uncore PMU support This patch adds the generic Intel uncore PMU support, including helper functions that add/delete uncore events, a hrtimer that periodically polls the counters to avoid overflow and code that places all events for a particular socket onto a single cpu. The code design is based on the structure of Sandy Bridge-EP's uncore subsystem, which consists of a variety of components, each component contains one or more "boxes". (Tooling support follows in the next patches.) Signed-off-by: Zheng Yan Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1339741902-8449-6-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 4 +- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 878 ++++++++++++++++++++++++++ arch/x86/kernel/cpu/perf_event_intel_uncore.h | 204 ++++++ 3 files changed, 1085 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/cpu/perf_event_intel_uncore.c create mode 100644 arch/x86/kernel/cpu/perf_event_intel_uncore.h diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 6ab6aa2fdfdd..bac4c3804cc7 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -32,7 +32,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifdef CONFIG_PERF_EVENTS obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o endif obj-$(CONFIG_X86_MCE) += mcheck/ diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c new file mode 100644 index 000000000000..fe76a07dfdbc --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -0,0 +1,878 @@ +#include "perf_event_intel_uncore.h" + +static struct intel_uncore_type *empty_uncore[] = { NULL, }; +static struct intel_uncore_type **msr_uncores = empty_uncore; + +/* mask of cpus that collect uncore events */ +static cpumask_t uncore_cpu_mask; + +/* constraint for the fixed counter */ +static struct event_constraint constraint_fixed = + EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); + +static void uncore_assign_hw_event(struct intel_uncore_box *box, + struct perf_event *event, int idx) +{ + struct hw_perf_event *hwc = &event->hw; + + hwc->idx = idx; + hwc->last_tag = ++box->tags[idx]; + + if (hwc->idx == UNCORE_PMC_IDX_FIXED) { + hwc->event_base = uncore_msr_fixed_ctr(box); + hwc->config_base = uncore_msr_fixed_ctl(box); + return; + } + + hwc->config_base = uncore_msr_event_ctl(box, hwc->idx); + hwc->event_base = uncore_msr_perf_ctr(box, hwc->idx); +} + +static void uncore_perf_event_update(struct intel_uncore_box *box, + struct perf_event *event) +{ + u64 prev_count, new_count, delta; + int shift; + + if (event->hw.idx >= UNCORE_PMC_IDX_FIXED) + shift = 64 - uncore_fixed_ctr_bits(box); + else + shift = 64 - uncore_perf_ctr_bits(box); + + /* the hrtimer might modify the previous event value */ +again: + prev_count = local64_read(&event->hw.prev_count); + new_count = uncore_read_counter(box, event); + if (local64_xchg(&event->hw.prev_count, new_count) != prev_count) + goto again; + + delta = (new_count << shift) - (prev_count << shift); + delta >>= shift; + + local64_add(delta, &event->count); +} + +/* + * The overflow interrupt is unavailable for SandyBridge-EP, is broken + * for SandyBridge. So we use hrtimer to periodically poll the counter + * to avoid overflow. + */ +static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) +{ + struct intel_uncore_box *box; + unsigned long flags; + int bit; + + box = container_of(hrtimer, struct intel_uncore_box, hrtimer); + if (!box->n_active || box->cpu != smp_processor_id()) + return HRTIMER_NORESTART; + /* + * disable local interrupt to prevent uncore_pmu_event_start/stop + * to interrupt the update process + */ + local_irq_save(flags); + + for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) + uncore_perf_event_update(box, box->events[bit]); + + local_irq_restore(flags); + + hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL)); + return HRTIMER_RESTART; +} + +static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) +{ + __hrtimer_start_range_ns(&box->hrtimer, + ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0, + HRTIMER_MODE_REL_PINNED, 0); +} + +static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) +{ + hrtimer_cancel(&box->hrtimer); +} + +static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) +{ + hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + box->hrtimer.function = uncore_pmu_hrtimer; +} + +struct intel_uncore_box *uncore_alloc_box(int cpu) +{ + struct intel_uncore_box *box; + + box = kmalloc_node(sizeof(*box), GFP_KERNEL | __GFP_ZERO, + cpu_to_node(cpu)); + if (!box) + return NULL; + + uncore_pmu_init_hrtimer(box); + atomic_set(&box->refcnt, 1); + box->cpu = -1; + box->phys_id = -1; + + return box; +} + +static struct intel_uncore_box * +uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) +{ + return *per_cpu_ptr(pmu->box, cpu); +} + +static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) +{ + return container_of(event->pmu, struct intel_uncore_pmu, pmu); +} + +static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) +{ + /* + * perf core schedules event on the basis of cpu, uncore events are + * collected by one of the cpus inside a physical package. + */ + return uncore_pmu_to_box(uncore_event_to_pmu(event), + smp_processor_id()); +} + +static int uncore_collect_events(struct intel_uncore_box *box, + struct perf_event *leader, bool dogrp) +{ + struct perf_event *event; + int n, max_count; + + max_count = box->pmu->type->num_counters; + if (box->pmu->type->fixed_ctl) + max_count++; + + if (box->n_events >= max_count) + return -EINVAL; + + n = box->n_events; + box->event_list[n] = leader; + n++; + if (!dogrp) + return n; + + list_for_each_entry(event, &leader->sibling_list, group_entry) { + if (event->state <= PERF_EVENT_STATE_OFF) + continue; + + if (n >= max_count) + return -EINVAL; + + box->event_list[n] = event; + n++; + } + return n; +} + +static struct event_constraint * +uncore_event_constraint(struct intel_uncore_type *type, + struct perf_event *event) +{ + struct event_constraint *c; + + if (event->hw.config == ~0ULL) + return &constraint_fixed; + + if (type->constraints) { + for_each_event_constraint(c, type->constraints) { + if ((event->hw.config & c->cmask) == c->code) + return c; + } + } + + return &type->unconstrainted; +} + +static int uncore_assign_events(struct intel_uncore_box *box, + int assign[], int n) +{ + unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; + struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX]; + int i, ret, wmin, wmax; + struct hw_perf_event *hwc; + + bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); + + for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { + c = uncore_event_constraint(box->pmu->type, + box->event_list[i]); + constraints[i] = c; + wmin = min(wmin, c->weight); + wmax = max(wmax, c->weight); + } + + /* fastpath, try to reuse previous register */ + for (i = 0; i < n; i++) { + hwc = &box->event_list[i]->hw; + c = constraints[i]; + + /* never assigned */ + if (hwc->idx == -1) + break; + + /* constraint still honored */ + if (!test_bit(hwc->idx, c->idxmsk)) + break; + + /* not already used */ + if (test_bit(hwc->idx, used_mask)) + break; + + __set_bit(hwc->idx, used_mask); + assign[i] = hwc->idx; + } + if (i == n) + return 0; + + /* slow path */ + ret = perf_assign_events(constraints, n, wmin, wmax, assign); + return ret ? -EINVAL : 0; +} + +static void uncore_pmu_event_start(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + int idx = event->hw.idx; + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) + return; + + event->hw.state = 0; + box->events[idx] = event; + box->n_active++; + __set_bit(idx, box->active_mask); + + local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); + uncore_enable_event(box, event); + + if (box->n_active == 1) { + uncore_enable_box(box); + uncore_pmu_start_hrtimer(box); + } +} + +static void uncore_pmu_event_stop(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + struct hw_perf_event *hwc = &event->hw; + + if (__test_and_clear_bit(hwc->idx, box->active_mask)) { + uncore_disable_event(box, event); + box->n_active--; + box->events[hwc->idx] = NULL; + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + + if (box->n_active == 0) { + uncore_disable_box(box); + uncore_pmu_cancel_hrtimer(box); + } + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + uncore_perf_event_update(box, event); + hwc->state |= PERF_HES_UPTODATE; + } +} + +static int uncore_pmu_event_add(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + struct hw_perf_event *hwc = &event->hw; + int assign[UNCORE_PMC_IDX_MAX]; + int i, n, ret; + + if (!box) + return -ENODEV; + + ret = n = uncore_collect_events(box, event, false); + if (ret < 0) + return ret; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; + + ret = uncore_assign_events(box, assign, n); + if (ret) + return ret; + + /* save events moving to new counters */ + for (i = 0; i < box->n_events; i++) { + event = box->event_list[i]; + hwc = &event->hw; + + if (hwc->idx == assign[i] && + hwc->last_tag == box->tags[assign[i]]) + continue; + /* + * Ensure we don't accidentally enable a stopped + * counter simply because we rescheduled. + */ + if (hwc->state & PERF_HES_STOPPED) + hwc->state |= PERF_HES_ARCH; + + uncore_pmu_event_stop(event, PERF_EF_UPDATE); + } + + /* reprogram moved events into new counters */ + for (i = 0; i < n; i++) { + event = box->event_list[i]; + hwc = &event->hw; + + if (hwc->idx != assign[i] || + hwc->last_tag != box->tags[assign[i]]) + uncore_assign_hw_event(box, event, assign[i]); + else if (i < box->n_events) + continue; + + if (hwc->state & PERF_HES_ARCH) + continue; + + uncore_pmu_event_start(event, 0); + } + box->n_events = n; + + return 0; +} + +static void uncore_pmu_event_del(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + int i; + + uncore_pmu_event_stop(event, PERF_EF_UPDATE); + + for (i = 0; i < box->n_events; i++) { + if (event == box->event_list[i]) { + while (++i < box->n_events) + box->event_list[i - 1] = box->event_list[i]; + + --box->n_events; + break; + } + } + + event->hw.idx = -1; + event->hw.last_tag = ~0ULL; +} + +static void uncore_pmu_event_read(struct perf_event *event) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + uncore_perf_event_update(box, event); +} + +/* + * validation ensures the group can be loaded onto the + * PMU if it was the only group available. + */ +static int uncore_validate_group(struct intel_uncore_pmu *pmu, + struct perf_event *event) +{ + struct perf_event *leader = event->group_leader; + struct intel_uncore_box *fake_box; + int assign[UNCORE_PMC_IDX_MAX]; + int ret = -EINVAL, n; + + fake_box = uncore_alloc_box(smp_processor_id()); + if (!fake_box) + return -ENOMEM; + + fake_box->pmu = pmu; + /* + * the event is not yet connected with its + * siblings therefore we must first collect + * existing siblings, then add the new event + * before we can simulate the scheduling + */ + n = uncore_collect_events(fake_box, leader, true); + if (n < 0) + goto out; + + fake_box->n_events = n; + n = uncore_collect_events(fake_box, event, false); + if (n < 0) + goto out; + + fake_box->n_events = n; + + ret = uncore_assign_events(fake_box, assign, n); +out: + kfree(fake_box); + return ret; +} + +int uncore_pmu_event_init(struct perf_event *event) +{ + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + struct hw_perf_event *hwc = &event->hw; + int ret; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + pmu = uncore_event_to_pmu(event); + /* no device found for this pmu */ + if (pmu->func_id < 0) + return -ENOENT; + + /* + * Uncore PMU does measure at all privilege level all the time. + * So it doesn't make sense to specify any exclude bits. + */ + if (event->attr.exclude_user || event->attr.exclude_kernel || + event->attr.exclude_hv || event->attr.exclude_idle) + return -EINVAL; + + /* Sampling not supported yet */ + if (hwc->sample_period) + return -EINVAL; + + /* + * Place all uncore events for a particular physical package + * onto a single cpu + */ + if (event->cpu < 0) + return -EINVAL; + box = uncore_pmu_to_box(pmu, event->cpu); + if (!box || box->cpu < 0) + return -EINVAL; + event->cpu = box->cpu; + + if (event->attr.config == UNCORE_FIXED_EVENT) { + /* no fixed counter */ + if (!pmu->type->fixed_ctl) + return -EINVAL; + /* + * if there is only one fixed counter, only the first pmu + * can access the fixed counter + */ + if (pmu->type->single_fixed && pmu->pmu_idx > 0) + return -EINVAL; + hwc->config = ~0ULL; + } else { + hwc->config = event->attr.config & pmu->type->event_mask; + } + + event->hw.idx = -1; + event->hw.last_tag = ~0ULL; + + if (event->group_leader != event) + ret = uncore_validate_group(pmu, event); + else + ret = 0; + + return ret; +} + +static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) +{ + int ret; + + pmu->pmu = (struct pmu) { + .attr_groups = pmu->type->attr_groups, + .task_ctx_nr = perf_invalid_context, + .event_init = uncore_pmu_event_init, + .add = uncore_pmu_event_add, + .del = uncore_pmu_event_del, + .start = uncore_pmu_event_start, + .stop = uncore_pmu_event_stop, + .read = uncore_pmu_event_read, + }; + + if (pmu->type->num_boxes == 1) { + if (strlen(pmu->type->name) > 0) + sprintf(pmu->name, "uncore_%s", pmu->type->name); + else + sprintf(pmu->name, "uncore"); + } else { + sprintf(pmu->name, "uncore_%s_%d", pmu->type->name, + pmu->pmu_idx); + } + + ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); + return ret; +} + +static void __init uncore_type_exit(struct intel_uncore_type *type) +{ + int i; + + for (i = 0; i < type->num_boxes; i++) + free_percpu(type->pmus[i].box); + kfree(type->pmus); + type->pmus = NULL; + kfree(type->attr_groups[1]); + type->attr_groups[1] = NULL; +} + +static int __init uncore_type_init(struct intel_uncore_type *type) +{ + struct intel_uncore_pmu *pmus; + struct attribute_group *events_group; + struct attribute **attrs; + int i, j; + + pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL); + if (!pmus) + return -ENOMEM; + + type->unconstrainted = (struct event_constraint) + __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, + 0, type->num_counters, 0); + + for (i = 0; i < type->num_boxes; i++) { + pmus[i].func_id = -1; + pmus[i].pmu_idx = i; + pmus[i].type = type; + pmus[i].box = alloc_percpu(struct intel_uncore_box *); + if (!pmus[i].box) + goto fail; + } + + if (type->event_descs) { + i = 0; + while (type->event_descs[i].attr.attr.name) + i++; + + events_group = kzalloc(sizeof(struct attribute *) * (i + 1) + + sizeof(*events_group), GFP_KERNEL); + if (!events_group) + goto fail; + + attrs = (struct attribute **)(events_group + 1); + events_group->name = "events"; + events_group->attrs = attrs; + + for (j = 0; j < i; j++) + attrs[j] = &type->event_descs[j].attr.attr; + + type->attr_groups[1] = events_group; + } + + type->pmus = pmus; + return 0; +fail: + uncore_type_exit(type); + return -ENOMEM; +} + +static int __init uncore_types_init(struct intel_uncore_type **types) +{ + int i, ret; + + for (i = 0; types[i]; i++) { + ret = uncore_type_init(types[i]); + if (ret) + goto fail; + } + return 0; +fail: + while (--i >= 0) + uncore_type_exit(types[i]); + return ret; +} + +static void __cpuinit uncore_cpu_dying(int cpu) +{ + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + int i, j; + + for (i = 0; msr_uncores[i]; i++) { + type = msr_uncores[i]; + for (j = 0; j < type->num_boxes; j++) { + pmu = &type->pmus[j]; + box = *per_cpu_ptr(pmu->box, cpu); + *per_cpu_ptr(pmu->box, cpu) = NULL; + if (box && atomic_dec_and_test(&box->refcnt)) + kfree(box); + } + } +} + +static int __cpuinit uncore_cpu_starting(int cpu) +{ + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box, *exist; + int i, j, k, phys_id; + + phys_id = topology_physical_package_id(cpu); + + for (i = 0; msr_uncores[i]; i++) { + type = msr_uncores[i]; + for (j = 0; j < type->num_boxes; j++) { + pmu = &type->pmus[j]; + box = *per_cpu_ptr(pmu->box, cpu); + /* called by uncore_cpu_init? */ + if (box && box->phys_id >= 0) { + uncore_box_init(box); + continue; + } + + for_each_online_cpu(k) { + exist = *per_cpu_ptr(pmu->box, k); + if (exist && exist->phys_id == phys_id) { + atomic_inc(&exist->refcnt); + *per_cpu_ptr(pmu->box, cpu) = exist; + kfree(box); + box = NULL; + break; + } + } + + if (box) { + box->phys_id = phys_id; + uncore_box_init(box); + } + } + } + return 0; +} + +static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id) +{ + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + int i, j; + + for (i = 0; msr_uncores[i]; i++) { + type = msr_uncores[i]; + for (j = 0; j < type->num_boxes; j++) { + pmu = &type->pmus[j]; + if (pmu->func_id < 0) + pmu->func_id = j; + + box = uncore_alloc_box(cpu); + if (!box) + return -ENOMEM; + + box->pmu = pmu; + box->phys_id = phys_id; + *per_cpu_ptr(pmu->box, cpu) = box; + } + } + return 0; +} + +static void __cpuinit uncore_change_context(struct intel_uncore_type **uncores, + int old_cpu, int new_cpu) +{ + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + int i, j; + + for (i = 0; uncores[i]; i++) { + type = uncores[i]; + for (j = 0; j < type->num_boxes; j++) { + pmu = &type->pmus[j]; + if (old_cpu < 0) + box = uncore_pmu_to_box(pmu, new_cpu); + else + box = uncore_pmu_to_box(pmu, old_cpu); + if (!box) + continue; + + if (old_cpu < 0) { + WARN_ON_ONCE(box->cpu != -1); + box->cpu = new_cpu; + continue; + } + + WARN_ON_ONCE(box->cpu != old_cpu); + if (new_cpu >= 0) { + uncore_pmu_cancel_hrtimer(box); + perf_pmu_migrate_context(&pmu->pmu, + old_cpu, new_cpu); + box->cpu = new_cpu; + } else { + box->cpu = -1; + } + } + } +} + +static void __cpuinit uncore_event_exit_cpu(int cpu) +{ + int i, phys_id, target; + + /* if exiting cpu is used for collecting uncore events */ + if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) + return; + + /* find a new cpu to collect uncore events */ + phys_id = topology_physical_package_id(cpu); + target = -1; + for_each_online_cpu(i) { + if (i == cpu) + continue; + if (phys_id == topology_physical_package_id(i)) { + target = i; + break; + } + } + + /* migrate uncore events to the new cpu */ + if (target >= 0) + cpumask_set_cpu(target, &uncore_cpu_mask); + + uncore_change_context(msr_uncores, cpu, target); +} + +static void __cpuinit uncore_event_init_cpu(int cpu) +{ + int i, phys_id; + + phys_id = topology_physical_package_id(cpu); + for_each_cpu(i, &uncore_cpu_mask) { + if (phys_id == topology_physical_package_id(i)) + return; + } + + cpumask_set_cpu(cpu, &uncore_cpu_mask); + + uncore_change_context(msr_uncores, -1, cpu); +} + +static int __cpuinit uncore_cpu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + /* allocate/free data structure for uncore box */ + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + uncore_cpu_prepare(cpu, -1); + break; + case CPU_STARTING: + uncore_cpu_starting(cpu); + break; + case CPU_UP_CANCELED: + case CPU_DYING: + uncore_cpu_dying(cpu); + break; + default: + break; + } + + /* select the cpu that collects uncore events */ + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DOWN_FAILED: + case CPU_STARTING: + uncore_event_init_cpu(cpu); + break; + case CPU_DOWN_PREPARE: + uncore_event_exit_cpu(cpu); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block uncore_cpu_nb __cpuinitdata = { + .notifier_call = uncore_cpu_notifier, + /* + * to migrate uncore events, our notifier should be executed + * before perf core's notifier. + */ + .priority = CPU_PRI_PERF + 1, +}; + +static void __init uncore_cpu_setup(void *dummy) +{ + uncore_cpu_starting(smp_processor_id()); +} + +static int __init uncore_cpu_init(void) +{ + int ret, cpu; + + switch (boot_cpu_data.x86_model) { + default: + return 0; + } + + ret = uncore_types_init(msr_uncores); + if (ret) + return ret; + + get_online_cpus(); + + for_each_online_cpu(cpu) { + int i, phys_id = topology_physical_package_id(cpu); + + for_each_cpu(i, &uncore_cpu_mask) { + if (phys_id == topology_physical_package_id(i)) { + phys_id = -1; + break; + } + } + if (phys_id < 0) + continue; + + uncore_cpu_prepare(cpu, phys_id); + uncore_event_init_cpu(cpu); + } + on_each_cpu(uncore_cpu_setup, NULL, 1); + + register_cpu_notifier(&uncore_cpu_nb); + + put_online_cpus(); + + return 0; +} + +static int __init uncore_pmus_register(void) +{ + struct intel_uncore_pmu *pmu; + struct intel_uncore_type *type; + int i, j; + + for (i = 0; msr_uncores[i]; i++) { + type = msr_uncores[i]; + for (j = 0; j < type->num_boxes; j++) { + pmu = &type->pmus[j]; + uncore_pmu_register(pmu); + } + } + + return 0; +} + +static int __init intel_uncore_init(void) +{ + int ret; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return -ENODEV; + + ret = uncore_cpu_init(); + if (ret) + goto fail; + + uncore_pmus_register(); + return 0; +fail: + return ret; +} +device_initcall(intel_uncore_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h new file mode 100644 index 000000000000..49a6bfbba0de --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -0,0 +1,204 @@ +#include +#include +#include +#include "perf_event.h" + +#define UNCORE_PMU_NAME_LEN 32 +#define UNCORE_BOX_HASH_SIZE 8 + +#define UNCORE_PMU_HRTIMER_INTERVAL (60 * NSEC_PER_SEC) + +#define UNCORE_FIXED_EVENT 0xffff +#define UNCORE_PMC_IDX_MAX_GENERIC 8 +#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC +#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1) + +#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) + +struct intel_uncore_ops; +struct intel_uncore_pmu; +struct intel_uncore_box; +struct uncore_event_desc; + +struct intel_uncore_type { + const char *name; + int num_counters; + int num_boxes; + int perf_ctr_bits; + int fixed_ctr_bits; + int single_fixed; + unsigned perf_ctr; + unsigned event_ctl; + unsigned event_mask; + unsigned fixed_ctr; + unsigned fixed_ctl; + unsigned box_ctl; + unsigned msr_offset; + struct event_constraint unconstrainted; + struct event_constraint *constraints; + struct intel_uncore_pmu *pmus; + struct intel_uncore_ops *ops; + struct uncore_event_desc *event_descs; + const struct attribute_group *attr_groups[3]; +}; + +#define format_group attr_groups[0] + +struct intel_uncore_ops { + void (*init_box)(struct intel_uncore_box *); + void (*disable_box)(struct intel_uncore_box *); + void (*enable_box)(struct intel_uncore_box *); + void (*disable_event)(struct intel_uncore_box *, struct perf_event *); + void (*enable_event)(struct intel_uncore_box *, struct perf_event *); + u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *); +}; + +struct intel_uncore_pmu { + struct pmu pmu; + char name[UNCORE_PMU_NAME_LEN]; + int pmu_idx; + int func_id; + struct intel_uncore_type *type; + struct intel_uncore_box ** __percpu box; +}; + +struct intel_uncore_box { + int phys_id; + int n_active; /* number of active events */ + int n_events; + int cpu; /* cpu to collect events */ + unsigned long flags; + atomic_t refcnt; + struct perf_event *events[UNCORE_PMC_IDX_MAX]; + struct perf_event *event_list[UNCORE_PMC_IDX_MAX]; + unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; + u64 tags[UNCORE_PMC_IDX_MAX]; + struct intel_uncore_pmu *pmu; + struct hrtimer hrtimer; + struct list_head list; +}; + +#define UNCORE_BOX_FLAG_INITIATED 0 + +struct uncore_event_desc { + struct kobj_attribute attr; + const char *config; +}; + +#define INTEL_UNCORE_EVENT_DESC(_name, _config) \ +{ \ + .attr = __ATTR(_name, 0444, uncore_event_show, NULL), \ + .config = _config, \ +} + +#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \ +static ssize_t __uncore_##_var##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ +static struct kobj_attribute format_attr_##_var = \ + __ATTR(_name, 0444, __uncore_##_var##_show, NULL) + + +static ssize_t uncore_event_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct uncore_event_desc *event = + container_of(attr, struct uncore_event_desc, attr); + return sprintf(buf, "%s", event->config); +} + +static inline +unsigned uncore_msr_box_ctl(struct intel_uncore_box *box) +{ + if (!box->pmu->type->box_ctl) + return 0; + return box->pmu->type->box_ctl + + box->pmu->type->msr_offset * box->pmu->pmu_idx; +} + +static inline +unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box) +{ + if (!box->pmu->type->fixed_ctl) + return 0; + return box->pmu->type->fixed_ctl + + box->pmu->type->msr_offset * box->pmu->pmu_idx; +} + +static inline +unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box) +{ + return box->pmu->type->fixed_ctr + + box->pmu->type->msr_offset * box->pmu->pmu_idx; +} + +static inline +unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx) +{ + return idx + box->pmu->type->event_ctl + + box->pmu->type->msr_offset * box->pmu->pmu_idx; +} + +static inline +unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx) +{ + return idx + box->pmu->type->perf_ctr + + box->pmu->type->msr_offset * box->pmu->pmu_idx; +} + +static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box) +{ + return box->pmu->type->perf_ctr_bits; +} + +static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box) +{ + return box->pmu->type->fixed_ctr_bits; +} + +static inline int uncore_num_counters(struct intel_uncore_box *box) +{ + return box->pmu->type->num_counters; +} + +static inline void uncore_disable_box(struct intel_uncore_box *box) +{ + if (box->pmu->type->ops->disable_box) + box->pmu->type->ops->disable_box(box); +} + +static inline void uncore_enable_box(struct intel_uncore_box *box) +{ + if (box->pmu->type->ops->enable_box) + box->pmu->type->ops->enable_box(box); +} + +static inline void uncore_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + box->pmu->type->ops->disable_event(box, event); +} + +static inline void uncore_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + box->pmu->type->ops->enable_event(box, event); +} + +static inline u64 uncore_read_counter(struct intel_uncore_box *box, + struct perf_event *event) +{ + return box->pmu->type->ops->read_counter(box, event); +} + +static inline void uncore_box_init(struct intel_uncore_box *box) +{ + if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { + if (box->pmu->type->ops->init_box) + box->pmu->type->ops->init_box(box); + } +} -- cgit v1.2.3 From fcde10e916326545e8fec1807357c68ef08dc443 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 15 Jun 2012 14:31:35 +0800 Subject: perf/x86: Add Intel Nehalem and Sandy Bridge uncore PMU support Signed-off-by: Zheng Yan Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1339741902-8449-7-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 195 ++++++++++++++++++++++++++ arch/x86/kernel/cpu/perf_event_intel_uncore.h | 50 +++++++ 2 files changed, 245 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index fe76a07dfdbc..3ed941ac3745 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -10,6 +10,192 @@ static cpumask_t uncore_cpu_mask; static struct event_constraint constraint_fixed = EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); +DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); +DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); +DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); +DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31"); + +/* Sandy Bridge uncore support */ +static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->idx < UNCORE_PMC_IDX_FIXED) + wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); + else + wrmsrl(hwc->config_base, SNB_UNC_CTL_EN); +} + +static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + wrmsrl(event->hw.config_base, 0); +} + +static u64 snb_uncore_msr_read_counter(struct intel_uncore_box *box, + struct perf_event *event) +{ + u64 count; + rdmsrl(event->hw.event_base, count); + return count; +} + +static void snb_uncore_msr_init_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) { + wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); + } +} + +static struct attribute *snb_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask5.attr, + NULL, +}; + +static struct attribute_group snb_uncore_format_group = { + .name = "format", + .attrs = snb_uncore_formats_attr, +}; + +static struct intel_uncore_ops snb_uncore_msr_ops = { + .init_box = snb_uncore_msr_init_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = snb_uncore_msr_read_counter, +}; + +static struct event_constraint snb_uncore_cbox_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x80, 0x1), + UNCORE_EVENT_CONSTRAINT(0x83, 0x1), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type snb_uncore_cbox = { + .name = "cbox", + .num_counters = 2, + .num_boxes = 4, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, + .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, + .fixed_ctr = SNB_UNC_FIXED_CTR, + .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .constraints = snb_uncore_cbox_constraints, + .ops = &snb_uncore_msr_ops, + .format_group = &snb_uncore_format_group, +}; + +static struct intel_uncore_type *snb_msr_uncores[] = { + &snb_uncore_cbox, + NULL, +}; +/* end of Sandy Bridge uncore support */ + +/* Nehalem uncore support */ +static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0); +} + +static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, + NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); +} + +static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->idx < UNCORE_PMC_IDX_FIXED) + wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); + else + wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN); +} + +static struct attribute *nhm_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask8.attr, + NULL, +}; + +static struct attribute_group nhm_uncore_format_group = { + .name = "format", + .attrs = nhm_uncore_formats_attr, +}; + +static struct uncore_event_desc nhm_uncore_events[] = { + INTEL_UNCORE_EVENT_DESC(CLOCKTICKS, "config=0xffff"), + /* full cache line writes to DRAM */ + INTEL_UNCORE_EVENT_DESC(QMC_WRITES_FULL_ANY, "event=0x2f,umask=0xf"), + /* Quickpath Memory Controller normal priority read requests */ + INTEL_UNCORE_EVENT_DESC(QMC_NORMAL_READS_ANY, "event=0x2c,umask=0xf"), + /* Quickpath Home Logic read requests from the IOH */ + INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_IOH_READS, + "event=0x20,umask=0x1"), + /* Quickpath Home Logic write requests from the IOH */ + INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_IOH_WRITES, + "event=0x20,umask=0x2"), + /* Quickpath Home Logic read requests from a remote socket */ + INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_REMOTE_READS, + "event=0x20,umask=0x4"), + /* Quickpath Home Logic write requests from a remote socket */ + INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_REMOTE_WRITES, + "event=0x20,umask=0x8"), + /* Quickpath Home Logic read requests from the local socket */ + INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_LOCAL_READS, + "event=0x20,umask=0x10"), + /* Quickpath Home Logic write requests from the local socket */ + INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_LOCAL_WRITES, + "event=0x20,umask=0x20"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops nhm_uncore_msr_ops = { + .disable_box = nhm_uncore_msr_disable_box, + .enable_box = nhm_uncore_msr_enable_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = nhm_uncore_msr_enable_event, + .read_counter = snb_uncore_msr_read_counter, +}; + +static struct intel_uncore_type nhm_uncore = { + .name = "", + .num_counters = 8, + .num_boxes = 1, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .event_ctl = NHM_UNC_PERFEVTSEL0, + .perf_ctr = NHM_UNC_UNCORE_PMC0, + .fixed_ctr = NHM_UNC_FIXED_CTR, + .fixed_ctl = NHM_UNC_FIXED_CTR_CTRL, + .event_mask = NHM_UNC_RAW_EVENT_MASK, + .event_descs = nhm_uncore_events, + .ops = &nhm_uncore_msr_ops, + .format_group = &nhm_uncore_format_group, +}; + +static struct intel_uncore_type *nhm_msr_uncores[] = { + &nhm_uncore, + NULL, +}; +/* end of Nehalem uncore support */ + static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx) { @@ -808,6 +994,15 @@ static int __init uncore_cpu_init(void) int ret, cpu; switch (boot_cpu_data.x86_model) { + case 26: /* Nehalem */ + case 30: + case 37: /* Westmere */ + case 44: + msr_uncores = nhm_msr_uncores; + break; + case 42: /* Sandy Bridge */ + msr_uncores = snb_msr_uncores; + break; default: return 0; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 49a6bfbba0de..eeb5ca5815a8 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -15,6 +15,56 @@ #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) +/* SNB event control */ +#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff +#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00 +#define SNB_UNC_CTL_EDGE_DET (1 << 18) +#define SNB_UNC_CTL_EN (1 << 22) +#define SNB_UNC_CTL_INVERT (1 << 23) +#define SNB_UNC_CTL_CMASK_MASK 0x1f000000 +#define NHM_UNC_CTL_CMASK_MASK 0xff000000 +#define NHM_UNC_FIXED_CTR_CTL_EN (1 << 0) + +#define SNB_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + SNB_UNC_CTL_UMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET | \ + SNB_UNC_CTL_INVERT | \ + SNB_UNC_CTL_CMASK_MASK) + +#define NHM_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + SNB_UNC_CTL_UMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET | \ + SNB_UNC_CTL_INVERT | \ + NHM_UNC_CTL_CMASK_MASK) + +/* SNB global control register */ +#define SNB_UNC_PERF_GLOBAL_CTL 0x391 +#define SNB_UNC_FIXED_CTR_CTRL 0x394 +#define SNB_UNC_FIXED_CTR 0x395 + +/* SNB uncore global control */ +#define SNB_UNC_GLOBAL_CTL_CORE_ALL ((1 << 4) - 1) +#define SNB_UNC_GLOBAL_CTL_EN (1 << 29) + +/* SNB Cbo register */ +#define SNB_UNC_CBO_0_PERFEVTSEL0 0x700 +#define SNB_UNC_CBO_0_PER_CTR0 0x706 +#define SNB_UNC_CBO_MSR_OFFSET 0x10 + +/* NHM global control register */ +#define NHM_UNC_PERF_GLOBAL_CTL 0x391 +#define NHM_UNC_FIXED_CTR 0x394 +#define NHM_UNC_FIXED_CTR_CTRL 0x395 + +/* NHM uncore global control */ +#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL ((1ULL << 8) - 1) +#define NHM_UNC_GLOBAL_CTL_EN_FC (1ULL << 32) + +/* NHM uncore register */ +#define NHM_UNC_PERFEVTSEL0 0x3c0 +#define NHM_UNC_UNCORE_PMC0 0x3b0 + + struct intel_uncore_ops; struct intel_uncore_pmu; struct intel_uncore_box; -- cgit v1.2.3 From 14371cce03c2fc393997e17f979e76674b7f392a Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 15 Jun 2012 14:31:36 +0800 Subject: perf: Add generic PCI uncore PMU device support This patch adds generic support for uncore PMUs presented as PCI devices. (These come in addition to the CPU/MSR based uncores.) Signed-off-by: Zheng Yan Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1339741902-8449-8-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 175 +++++++++++++++++++++++++- arch/x86/kernel/cpu/perf_event_intel_uncore.h | 66 ++++++++++ 2 files changed, 236 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 3ed941ac3745..e20c65a0e108 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2,6 +2,11 @@ static struct intel_uncore_type *empty_uncore[] = { NULL, }; static struct intel_uncore_type **msr_uncores = empty_uncore; +static struct intel_uncore_type **pci_uncores = empty_uncore; +/* pci bus to socket mapping */ +static int pcibus_to_physid[256] = { [0 ... 255] = -1, }; + +static DEFINE_RAW_SPINLOCK(uncore_box_lock); /* mask of cpus that collect uncore events */ static cpumask_t uncore_cpu_mask; @@ -205,13 +210,13 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box, hwc->last_tag = ++box->tags[idx]; if (hwc->idx == UNCORE_PMC_IDX_FIXED) { - hwc->event_base = uncore_msr_fixed_ctr(box); - hwc->config_base = uncore_msr_fixed_ctl(box); + hwc->event_base = uncore_fixed_ctr(box); + hwc->config_base = uncore_fixed_ctl(box); return; } - hwc->config_base = uncore_msr_event_ctl(box, hwc->idx); - hwc->event_base = uncore_msr_perf_ctr(box, hwc->idx); + hwc->config_base = uncore_event_ctl(box, hwc->idx); + hwc->event_base = uncore_perf_ctr(box, hwc->idx); } static void uncore_perf_event_update(struct intel_uncore_box *box, @@ -305,6 +310,22 @@ struct intel_uncore_box *uncore_alloc_box(int cpu) static struct intel_uncore_box * uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) { + static struct intel_uncore_box *box; + + box = *per_cpu_ptr(pmu->box, cpu); + if (box) + return box; + + raw_spin_lock(&uncore_box_lock); + list_for_each_entry(box, &pmu->box_list, list) { + if (box->phys_id == topology_physical_package_id(cpu)) { + atomic_inc(&box->refcnt); + *per_cpu_ptr(pmu->box, cpu) = box; + break; + } + } + raw_spin_unlock(&uncore_box_lock); + return *per_cpu_ptr(pmu->box, cpu); } @@ -706,6 +727,13 @@ static void __init uncore_type_exit(struct intel_uncore_type *type) type->attr_groups[1] = NULL; } +static void uncore_types_exit(struct intel_uncore_type **types) +{ + int i; + for (i = 0; types[i]; i++) + uncore_type_exit(types[i]); +} + static int __init uncore_type_init(struct intel_uncore_type *type) { struct intel_uncore_pmu *pmus; @@ -725,6 +753,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type) pmus[i].func_id = -1; pmus[i].pmu_idx = i; pmus[i].type = type; + INIT_LIST_HEAD(&pmus[i].box_list); pmus[i].box = alloc_percpu(struct intel_uncore_box *); if (!pmus[i].box) goto fail; @@ -773,6 +802,127 @@ fail: return ret; } +static struct pci_driver *uncore_pci_driver; +static bool pcidrv_registered; + +/* + * add a pci uncore device + */ +static int __devinit uncore_pci_add(struct intel_uncore_type *type, + struct pci_dev *pdev) +{ + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + int i, phys_id; + + phys_id = pcibus_to_physid[pdev->bus->number]; + if (phys_id < 0) + return -ENODEV; + + box = uncore_alloc_box(0); + if (!box) + return -ENOMEM; + + /* + * for performance monitoring unit with multiple boxes, + * each box has a different function id. + */ + for (i = 0; i < type->num_boxes; i++) { + pmu = &type->pmus[i]; + if (pmu->func_id == pdev->devfn) + break; + if (pmu->func_id < 0) { + pmu->func_id = pdev->devfn; + break; + } + pmu = NULL; + } + + if (!pmu) { + kfree(box); + return -EINVAL; + } + + box->phys_id = phys_id; + box->pci_dev = pdev; + box->pmu = pmu; + uncore_box_init(box); + pci_set_drvdata(pdev, box); + + raw_spin_lock(&uncore_box_lock); + list_add_tail(&box->list, &pmu->box_list); + raw_spin_unlock(&uncore_box_lock); + + return 0; +} + +static void __devexit uncore_pci_remove(struct pci_dev *pdev) +{ + struct intel_uncore_box *box = pci_get_drvdata(pdev); + struct intel_uncore_pmu *pmu = box->pmu; + int cpu, phys_id = pcibus_to_physid[pdev->bus->number]; + + if (WARN_ON_ONCE(phys_id != box->phys_id)) + return; + + raw_spin_lock(&uncore_box_lock); + list_del(&box->list); + raw_spin_unlock(&uncore_box_lock); + + for_each_possible_cpu(cpu) { + if (*per_cpu_ptr(pmu->box, cpu) == box) { + *per_cpu_ptr(pmu->box, cpu) = NULL; + atomic_dec(&box->refcnt); + } + } + + WARN_ON_ONCE(atomic_read(&box->refcnt) != 1); + kfree(box); +} + +static int __devinit uncore_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct intel_uncore_type *type; + + type = (struct intel_uncore_type *)id->driver_data; + return uncore_pci_add(type, pdev); +} + +static int __init uncore_pci_init(void) +{ + int ret; + + switch (boot_cpu_data.x86_model) { + default: + return 0; + } + + ret = uncore_types_init(pci_uncores); + if (ret) + return ret; + + uncore_pci_driver->probe = uncore_pci_probe; + uncore_pci_driver->remove = uncore_pci_remove; + + ret = pci_register_driver(uncore_pci_driver); + if (ret == 0) + pcidrv_registered = true; + else + uncore_types_exit(pci_uncores); + + return ret; +} + +static void __init uncore_pci_exit(void) +{ + if (pcidrv_registered) { + pcidrv_registered = false; + pci_unregister_driver(uncore_pci_driver); + uncore_types_exit(pci_uncores); + } +} + static void __cpuinit uncore_cpu_dying(int cpu) { struct intel_uncore_type *type; @@ -921,6 +1071,7 @@ static void __cpuinit uncore_event_exit_cpu(int cpu) cpumask_set_cpu(target, &uncore_cpu_mask); uncore_change_context(msr_uncores, cpu, target); + uncore_change_context(pci_uncores, cpu, target); } static void __cpuinit uncore_event_init_cpu(int cpu) @@ -936,6 +1087,7 @@ static void __cpuinit uncore_event_init_cpu(int cpu) cpumask_set_cpu(cpu, &uncore_cpu_mask); uncore_change_context(msr_uncores, -1, cpu); + uncore_change_context(pci_uncores, -1, cpu); } static int __cpuinit uncore_cpu_notifier(struct notifier_block *self, @@ -1051,6 +1203,14 @@ static int __init uncore_pmus_register(void) } } + for (i = 0; pci_uncores[i]; i++) { + type = pci_uncores[i]; + for (j = 0; j < type->num_boxes; j++) { + pmu = &type->pmus[j]; + uncore_pmu_register(pmu); + } + } + return 0; } @@ -1061,9 +1221,14 @@ static int __init intel_uncore_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return -ENODEV; - ret = uncore_cpu_init(); + ret = uncore_pci_init(); if (ret) goto fail; + ret = uncore_cpu_init(); + if (ret) { + uncore_pci_exit(); + goto fail; + } uncore_pmus_register(); return 0; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index eeb5ca5815a8..aa01df87b8de 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -1,5 +1,6 @@ #include #include +#include #include #include "perf_event.h" @@ -110,6 +111,7 @@ struct intel_uncore_pmu { int func_id; struct intel_uncore_type *type; struct intel_uncore_box ** __percpu box; + struct list_head box_list; }; struct intel_uncore_box { @@ -123,6 +125,7 @@ struct intel_uncore_box { struct perf_event *event_list[UNCORE_PMC_IDX_MAX]; unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; u64 tags[UNCORE_PMC_IDX_MAX]; + struct pci_dev *pci_dev; struct intel_uncore_pmu *pmu; struct hrtimer hrtimer; struct list_head list; @@ -161,6 +164,33 @@ static ssize_t uncore_event_show(struct kobject *kobj, return sprintf(buf, "%s", event->config); } +static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box) +{ + return box->pmu->type->box_ctl; +} + +static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box) +{ + return box->pmu->type->fixed_ctl; +} + +static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box) +{ + return box->pmu->type->fixed_ctr; +} + +static inline +unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx) +{ + return idx * 4 + box->pmu->type->event_ctl; +} + +static inline +unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx) +{ + return idx * 8 + box->pmu->type->perf_ctr; +} + static inline unsigned uncore_msr_box_ctl(struct intel_uncore_box *box) { @@ -200,6 +230,42 @@ unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx) box->pmu->type->msr_offset * box->pmu->pmu_idx; } +static inline +unsigned uncore_fixed_ctl(struct intel_uncore_box *box) +{ + if (box->pci_dev) + return uncore_pci_fixed_ctl(box); + else + return uncore_msr_fixed_ctl(box); +} + +static inline +unsigned uncore_fixed_ctr(struct intel_uncore_box *box) +{ + if (box->pci_dev) + return uncore_pci_fixed_ctr(box); + else + return uncore_msr_fixed_ctr(box); +} + +static inline +unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx) +{ + if (box->pci_dev) + return uncore_pci_event_ctl(box, idx); + else + return uncore_msr_event_ctl(box, idx); +} + +static inline +unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx) +{ + if (box->pci_dev) + return uncore_pci_perf_ctr(box, idx); + else + return uncore_msr_perf_ctr(box, idx); +} + static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box) { return box->pmu->type->perf_ctr_bits; -- cgit v1.2.3 From 7c94ee2e0917b2ea56498bff939c8aa55da27207 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 15 Jun 2012 14:31:37 +0800 Subject: perf/x86: Add Intel Nehalem and Sandy Bridge-EP uncore support The uncore subsystem in Sandy Bridge-EP consists of 8 components: Ubox, Cacheing Agent, Home Agent, Memory controller, Power Control, QPI Link Layer, R2PCIe, R3QPI. Signed-off-by: Zheng Yan Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1339741902-8449-9-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 484 ++++++++++++++++++++++++++ arch/x86/kernel/cpu/perf_event_intel_uncore.h | 86 +++++ include/linux/pci_ids.h | 11 + 3 files changed, 581 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index e20c65a0e108..d34f68bf990b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -21,6 +21,482 @@ DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28"); +DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15"); +DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); +DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); + +/* Sandy Bridge-EP uncore support */ +static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + u32 config; + + pci_read_config_dword(pdev, box_ctl, &config); + config |= SNBEP_PMON_BOX_CTL_FRZ; + pci_write_config_dword(pdev, box_ctl, config); +} + +static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + u32 config; + + pci_read_config_dword(pdev, box_ctl, &config); + config &= ~SNBEP_PMON_BOX_CTL_FRZ; + pci_write_config_dword(pdev, box_ctl, config); +} + +static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base, hwc->config | + SNBEP_PMON_CTL_EN); +} + +static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, hwc->config_base, hwc->config); +} + +static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + u64 count; + + pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); + pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); + return count; +} + +static void snbep_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, + SNBEP_PMON_BOX_CTL_INT); +} + +static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + u64 config; + unsigned msr; + + msr = uncore_msr_box_ctl(box); + if (msr) { + rdmsrl(msr, config); + config |= SNBEP_PMON_BOX_CTL_FRZ; + wrmsrl(msr, config); + return; + } +} + +static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + u64 config; + unsigned msr; + + msr = uncore_msr_box_ctl(box); + if (msr) { + rdmsrl(msr, config); + config &= ~SNBEP_PMON_BOX_CTL_FRZ; + wrmsrl(msr, config); + return; + } +} + +static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, hwc->config); +} + +static u64 snbep_uncore_msr_read_counter(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 count; + + rdmsrl(hwc->event_base, count); + return count; +} + +static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + if (msr) + wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT); +} + +static struct attribute *snbep_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute *snbep_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + NULL, +}; + +static struct attribute *snbep_uncore_pcu_formats_attr[] = { + &format_attr_event.attr, + &format_attr_occ_sel.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh5.attr, + &format_attr_occ_invert.attr, + &format_attr_occ_edge.attr, + NULL, +}; + +static struct uncore_event_desc snbep_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(CLOCKTICKS, "config=0xffff"), + /* read */ + INTEL_UNCORE_EVENT_DESC(CAS_COUNT_RD, "event=0x4,umask=0x3"), + /* write */ + INTEL_UNCORE_EVENT_DESC(CAS_COUNT_WR, "event=0x4,umask=0xc"), + { /* end: all zeroes */ }, +}; + +static struct uncore_event_desc snbep_uncore_qpi_events[] = { + INTEL_UNCORE_EVENT_DESC(CLOCKTICKS, "event=0x14"), + /* outgoing data+nondata flits */ + INTEL_UNCORE_EVENT_DESC(TxL_FLITS_ACTIVE, "event=0x0,umask=0x6"), + /* DRS data received */ + INTEL_UNCORE_EVENT_DESC(DRS_DATA, "event=0x2,umask=0x8"), + /* NCB data received */ + INTEL_UNCORE_EVENT_DESC(NCB_DATA, "event=0x3,umask=0x4"), + { /* end: all zeroes */ }, +}; + +static struct attribute_group snbep_uncore_format_group = { + .name = "format", + .attrs = snbep_uncore_formats_attr, +}; + +static struct attribute_group snbep_uncore_ubox_format_group = { + .name = "format", + .attrs = snbep_uncore_ubox_formats_attr, +}; + +static struct attribute_group snbep_uncore_pcu_format_group = { + .name = "format", + .attrs = snbep_uncore_pcu_formats_attr, +}; + +static struct intel_uncore_ops snbep_uncore_msr_ops = { + .init_box = snbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = snbep_uncore_msr_enable_event, + .read_counter = snbep_uncore_msr_read_counter, +}; + +static struct intel_uncore_ops snbep_uncore_pci_ops = { + .init_box = snbep_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_uncore_pci_enable_event, + .read_counter = snbep_uncore_pci_read_counter, +}; + +static struct event_constraint snbep_uncore_cbox_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x01, 0x1), + UNCORE_EVENT_CONSTRAINT(0x02, 0x3), + UNCORE_EVENT_CONSTRAINT(0x04, 0x3), + UNCORE_EVENT_CONSTRAINT(0x05, 0x3), + UNCORE_EVENT_CONSTRAINT(0x07, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x1), + UNCORE_EVENT_CONSTRAINT(0x12, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x3), + UNCORE_EVENT_CONSTRAINT(0x1b, 0xc), + UNCORE_EVENT_CONSTRAINT(0x1c, 0xc), + UNCORE_EVENT_CONSTRAINT(0x1d, 0xc), + UNCORE_EVENT_CONSTRAINT(0x1e, 0xc), + UNCORE_EVENT_CONSTRAINT(0x1f, 0xe), + UNCORE_EVENT_CONSTRAINT(0x21, 0x3), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x31, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + UNCORE_EVENT_CONSTRAINT(0x35, 0x3), + UNCORE_EVENT_CONSTRAINT(0x36, 0x1), + UNCORE_EVENT_CONSTRAINT(0x37, 0x3), + UNCORE_EVENT_CONSTRAINT(0x38, 0x3), + UNCORE_EVENT_CONSTRAINT(0x39, 0x3), + UNCORE_EVENT_CONSTRAINT(0x3b, 0x1), + EVENT_CONSTRAINT_END +}; + +static struct event_constraint snbep_uncore_r2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x12, 0x1), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x24, 0x3), + UNCORE_EVENT_CONSTRAINT(0x25, 0x3), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct event_constraint snbep_uncore_r3qpi_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x10, 0x3), + UNCORE_EVENT_CONSTRAINT(0x11, 0x3), + UNCORE_EVENT_CONSTRAINT(0x12, 0x3), + UNCORE_EVENT_CONSTRAINT(0x13, 0x1), + UNCORE_EVENT_CONSTRAINT(0x20, 0x3), + UNCORE_EVENT_CONSTRAINT(0x21, 0x3), + UNCORE_EVENT_CONSTRAINT(0x22, 0x3), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x24, 0x3), + UNCORE_EVENT_CONSTRAINT(0x25, 0x3), + UNCORE_EVENT_CONSTRAINT(0x26, 0x3), + UNCORE_EVENT_CONSTRAINT(0x30, 0x3), + UNCORE_EVENT_CONSTRAINT(0x31, 0x3), + UNCORE_EVENT_CONSTRAINT(0x32, 0x3), + UNCORE_EVENT_CONSTRAINT(0x33, 0x3), + UNCORE_EVENT_CONSTRAINT(0x34, 0x3), + UNCORE_EVENT_CONSTRAINT(0x36, 0x3), + UNCORE_EVENT_CONSTRAINT(0x37, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type snbep_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNBEP_U_MSR_PMON_CTR0, + .event_ctl = SNBEP_U_MSR_PMON_CTL0, + .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK, + .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL, + .ops = &snbep_uncore_msr_ops, + .format_group = &snbep_uncore_ubox_format_group, +}; + +static struct intel_uncore_type snbep_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 44, + .event_ctl = SNBEP_C0_MSR_PMON_CTL0, + .perf_ctr = SNBEP_C0_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = SNBEP_CBO_MSR_OFFSET, + .constraints = snbep_uncore_cbox_constraints, + .ops = &snbep_uncore_msr_ops, + .format_group = &snbep_uncore_format_group, +}; + +static struct intel_uncore_type snbep_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0, + .event_ctl = SNBEP_PCU_MSR_PMON_CTL0, + .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, + .ops = &snbep_uncore_msr_ops, + .format_group = &snbep_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *snbep_msr_uncores[] = { + &snbep_uncore_ubox, + &snbep_uncore_cbox, + &snbep_uncore_pcu, + NULL, +}; + +#define SNBEP_UNCORE_PCI_COMMON_INIT() \ + .perf_ctr = SNBEP_PCI_PMON_CTR0, \ + .event_ctl = SNBEP_PCI_PMON_CTL0, \ + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \ + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \ + .ops = &snbep_uncore_pci_ops, \ + .format_group = &snbep_uncore_format_group + +static struct intel_uncore_type snbep_uncore_ha = { + .name = "ha", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 4, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, + .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, + .event_descs = snbep_uncore_imc_events, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_qpi = { + .name = "qpi", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_descs = snbep_uncore_qpi_events, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + + +static struct intel_uncore_type snbep_uncore_r2pcie = { + .name = "r2pcie", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 44, + .constraints = snbep_uncore_r2pcie_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_r3qpi = { + .name = "r3qpi", + .num_counters = 3, + .num_boxes = 2, + .perf_ctr_bits = 44, + .constraints = snbep_uncore_r3qpi_constraints, + SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type *snbep_pci_uncores[] = { + &snbep_uncore_ha, + &snbep_uncore_imc, + &snbep_uncore_qpi, + &snbep_uncore_r2pcie, + &snbep_uncore_r3qpi, + NULL, +}; + +static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = { + { /* Home Agent */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA), + .driver_data = (unsigned long)&snbep_uncore_ha, + }, + { /* MC Channel 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0), + .driver_data = (unsigned long)&snbep_uncore_imc, + }, + { /* MC Channel 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1), + .driver_data = (unsigned long)&snbep_uncore_imc, + }, + { /* MC Channel 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2), + .driver_data = (unsigned long)&snbep_uncore_imc, + }, + { /* MC Channel 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3), + .driver_data = (unsigned long)&snbep_uncore_imc, + }, + { /* QPI Port 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0), + .driver_data = (unsigned long)&snbep_uncore_qpi, + }, + { /* QPI Port 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1), + .driver_data = (unsigned long)&snbep_uncore_qpi, + }, + { /* P2PCIe */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE), + .driver_data = (unsigned long)&snbep_uncore_r2pcie, + }, + { /* R3QPI Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0), + .driver_data = (unsigned long)&snbep_uncore_r3qpi, + }, + { /* R3QPI Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1), + .driver_data = (unsigned long)&snbep_uncore_r3qpi, + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver snbep_uncore_pci_driver = { + .name = "snbep_uncore", + .id_table = snbep_uncore_pci_ids, +}; + +/* + * build pci bus to socket mapping + */ +static void snbep_pci2phy_map_init(void) +{ + struct pci_dev *ubox_dev = NULL; + int i, bus, nodeid; + u32 config; + + while (1) { + /* find the UBOX device */ + ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX, + ubox_dev); + if (!ubox_dev) + break; + bus = ubox_dev->bus->number; + /* get the Node ID of the local register */ + pci_read_config_dword(ubox_dev, 0x40, &config); + nodeid = config; + /* get the Node ID mapping */ + pci_read_config_dword(ubox_dev, 0x54, &config); + /* + * every three bits in the Node ID mapping register maps + * to a particular node. + */ + for (i = 0; i < 8; i++) { + if (nodeid == ((config >> (3 * i)) & 0x7)) { + pcibus_to_physid[bus] = i; + break; + } + } + }; + return; +} +/* end of Sandy Bridge-EP uncore support */ + /* Sandy Bridge uncore support */ static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, @@ -894,6 +1370,11 @@ static int __init uncore_pci_init(void) int ret; switch (boot_cpu_data.x86_model) { + case 45: /* Sandy Bridge-EP */ + pci_uncores = snbep_pci_uncores; + uncore_pci_driver = &snbep_uncore_pci_driver; + snbep_pci2phy_map_init(); + break; default: return 0; } @@ -1155,6 +1636,9 @@ static int __init uncore_cpu_init(void) case 42: /* Sandy Bridge */ msr_uncores = snb_msr_uncores; break; + case 45: /* Sandy Birdge-EP */ + msr_uncores = snbep_msr_uncores; + break; default: return 0; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index aa01df87b8de..4d52db0d1dfe 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -65,6 +65,92 @@ #define NHM_UNC_PERFEVTSEL0 0x3c0 #define NHM_UNC_UNCORE_PMC0 0x3b0 +/* SNB-EP Box level control */ +#define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0) +#define SNBEP_PMON_BOX_CTL_RST_CTRS (1 << 1) +#define SNBEP_PMON_BOX_CTL_FRZ (1 << 8) +#define SNBEP_PMON_BOX_CTL_FRZ_EN (1 << 16) +#define SNBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \ + SNBEP_PMON_BOX_CTL_RST_CTRS | \ + SNBEP_PMON_BOX_CTL_FRZ_EN) +/* SNB-EP event control */ +#define SNBEP_PMON_CTL_EV_SEL_MASK 0x000000ff +#define SNBEP_PMON_CTL_UMASK_MASK 0x0000ff00 +#define SNBEP_PMON_CTL_RST (1 << 17) +#define SNBEP_PMON_CTL_EDGE_DET (1 << 18) +#define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21) /* only for QPI */ +#define SNBEP_PMON_CTL_EN (1 << 22) +#define SNBEP_PMON_CTL_INVERT (1 << 23) +#define SNBEP_PMON_CTL_TRESH_MASK 0xff000000 +#define SNBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_INVERT | \ + SNBEP_PMON_CTL_TRESH_MASK) + +/* SNB-EP Ubox event control */ +#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK 0x1f000000 +#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PMON_CTL_UMASK_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_INVERT | \ + SNBEP_U_MSR_PMON_CTL_TRESH_MASK) + +/* SNB-EP PCU event control */ +#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK 0x0000c000 +#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK 0x1f000000 +#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT (1 << 30) +#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET (1 << 31) +#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_CTL_EV_SEL_MASK | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ + SNBEP_PMON_CTL_EDGE_DET | \ + SNBEP_PMON_CTL_INVERT | \ + SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ + SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) + +/* SNB-EP pci control register */ +#define SNBEP_PCI_PMON_BOX_CTL 0xf4 +#define SNBEP_PCI_PMON_CTL0 0xd8 +/* SNB-EP pci counter register */ +#define SNBEP_PCI_PMON_CTR0 0xa0 + +/* SNB-EP home agent register */ +#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0 0x40 +#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1 0x44 +#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH 0x48 +/* SNB-EP memory controller register */ +#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL 0xf0 +#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR 0xd0 +/* SNB-EP QPI register */ +#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0 0x228 +#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1 0x22c +#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0 0x238 +#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1 0x23c + +/* SNB-EP Ubox register */ +#define SNBEP_U_MSR_PMON_CTR0 0xc16 +#define SNBEP_U_MSR_PMON_CTL0 0xc10 + +#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL 0xc08 +#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR 0xc09 + +/* SNB-EP Cbo register */ +#define SNBEP_C0_MSR_PMON_CTR0 0xd16 +#define SNBEP_C0_MSR_PMON_CTL0 0xd10 +#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14 +#define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04 +#define SNBEP_CBO_MSR_OFFSET 0x20 + +/* SNB-EP PCU register */ +#define SNBEP_PCU_MSR_PMON_CTR0 0xc36 +#define SNBEP_PCU_MSR_PMON_CTL0 0xc30 +#define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34 +#define SNBEP_PCU_MSR_PMON_BOX_CTL 0xc24 +#define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc +#define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd struct intel_uncore_ops; struct intel_uncore_pmu; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index ab741b0d0074..5f187026b812 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2755,6 +2755,17 @@ #define PCI_DEVICE_ID_INTEL_IOAT_SNB7 0x3c27 #define PCI_DEVICE_ID_INTEL_IOAT_SNB8 0x3c2e #define PCI_DEVICE_ID_INTEL_IOAT_SNB9 0x3c2f +#define PCI_DEVICE_ID_INTEL_UNC_HA 0x3c46 +#define PCI_DEVICE_ID_INTEL_UNC_IMC0 0x3cb0 +#define PCI_DEVICE_ID_INTEL_UNC_IMC1 0x3cb1 +#define PCI_DEVICE_ID_INTEL_UNC_IMC2 0x3cb4 +#define PCI_DEVICE_ID_INTEL_UNC_IMC3 0x3cb5 +#define PCI_DEVICE_ID_INTEL_UNC_QPI0 0x3c41 +#define PCI_DEVICE_ID_INTEL_UNC_QPI1 0x3c42 +#define PCI_DEVICE_ID_INTEL_UNC_R2PCIE 0x3c43 +#define PCI_DEVICE_ID_INTEL_UNC_R3QPI0 0x3c44 +#define PCI_DEVICE_ID_INTEL_UNC_R3QPI1 0x3c45 +#define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX 0x3ce0 #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f #define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 -- cgit v1.2.3 From 46010ab2607aed9484816a8f1679c2ec3c8e7dcf Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 15 Jun 2012 14:31:38 +0800 Subject: perf/tool: Use data struct for arg passing in event parse function Moving all the bison arguments into the structure. In upcomming patches we are going to: - add more arguments - reuse the grammer for term parsing so it's more clear to pack/separate related arguments. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1339741902-8449-10-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 16 +++++++------ tools/perf/util/parse-events.h | 8 +++++-- tools/perf/util/parse-events.y | 52 ++++++++++++++++++++++++++++-------------- 3 files changed, 50 insertions(+), 26 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 05dbc8b3c767..c71b29ad26ec 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -26,7 +26,7 @@ struct event_symbol { #ifdef PARSER_DEBUG extern int parse_events_debug; #endif -int parse_events_parse(struct list_head *list, int *idx); +int parse_events_parse(void *data); #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x @@ -789,25 +789,27 @@ int parse_events_modifier(struct list_head *list, char *str) int parse_events(struct perf_evlist *evlist, const char *str, int unset __used) { - LIST_HEAD(list); - LIST_HEAD(list_tmp); + struct parse_events_data__events data = { + .list = LIST_HEAD_INIT(data.list), + .idx = evlist->nr_entries, + }; YY_BUFFER_STATE buffer; - int ret, idx = evlist->nr_entries; + int ret; buffer = parse_events__scan_string(str); #ifdef PARSER_DEBUG parse_events_debug = 1; #endif - ret = parse_events_parse(&list, &idx); + ret = parse_events_parse(&data); parse_events__flush_buffer(buffer); parse_events__delete_buffer(buffer); parse_events_lex_destroy(); if (!ret) { - int entries = idx - evlist->nr_entries; - perf_evlist__splice_list_tail(evlist, &list, entries); + int entries = data.idx - evlist->nr_entries; + perf_evlist__splice_list_tail(evlist, &data.list, entries); return 0; } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 8cac57ab4ee6..dc3c83a0ab8a 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -63,6 +63,11 @@ struct parse_events__term { struct list_head list; }; +struct parse_events_data__events { + struct list_head list; + int idx; +}; + int parse_events__is_hardcoded_term(struct parse_events__term *term); int parse_events__term_num(struct parse_events__term **_term, int type_term, char *config, long num); @@ -83,8 +88,7 @@ int parse_events_add_pmu(struct list_head **list, int *idx, char *pmu , struct list_head *head_config); void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all); -void parse_events_error(struct list_head *list_all, - int *idx, char const *msg); +void parse_events_error(void *data, char const *msg); int parse_events__test(void); void print_events(const char *event_glob); diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 362cc59332ae..e533bf72ba9c 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -1,7 +1,6 @@ %name-prefix "parse_events_" -%parse-param {struct list_head *list_all} -%parse-param {int *idx} +%parse-param {void *_data} %{ @@ -64,18 +63,22 @@ events ',' event | event event: event_def PE_MODIFIER_EVENT { + struct parse_events_data__events *data = _data; + /* * Apply modifier on all events added by single event definition * (there could be more events added for multiple tracepoint * definitions via '*?'. */ ABORT_ON(parse_events_modifier($1, $2)); - parse_events_update_lists($1, list_all); + parse_events_update_lists($1, &data->list); } | event_def { - parse_events_update_lists($1, list_all); + struct parse_events_data__events *data = _data; + + parse_events_update_lists($1, &data->list); } event_def: event_pmu | @@ -89,9 +92,10 @@ event_def: event_pmu | event_pmu: PE_NAME '/' event_config '/' { + struct parse_events_data__events *data = _data; struct list_head *list = NULL; - ABORT_ON(parse_events_add_pmu(&list, idx, $1, $3)); + ABORT_ON(parse_events_add_pmu(&list, &data->idx, $1, $3)); parse_events__free_terms($3); $$ = list; } @@ -99,91 +103,106 @@ PE_NAME '/' event_config '/' event_legacy_symbol: PE_VALUE_SYM '/' event_config '/' { + struct parse_events_data__events *data = _data; struct list_head *list = NULL; int type = $1 >> 16; int config = $1 & 255; - ABORT_ON(parse_events_add_numeric(&list, idx, type, config, $3)); + ABORT_ON(parse_events_add_numeric(&list, &data->idx, + type, config, $3)); parse_events__free_terms($3); $$ = list; } | PE_VALUE_SYM sep_slash_dc { + struct parse_events_data__events *data = _data; struct list_head *list = NULL; int type = $1 >> 16; int config = $1 & 255; - ABORT_ON(parse_events_add_numeric(&list, idx, type, config, NULL)); + ABORT_ON(parse_events_add_numeric(&list, &data->idx, + type, config, NULL)); $$ = list; } event_legacy_cache: PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT { + struct parse_events_data__events *data = _data; struct list_head *list = NULL; - ABORT_ON(parse_events_add_cache(&list, idx, $1, $3, $5)); + ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, $5)); $$ = list; } | PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT { + struct parse_events_data__events *data = _data; struct list_head *list = NULL; - ABORT_ON(parse_events_add_cache(&list, idx, $1, $3, NULL)); + ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, NULL)); $$ = list; } | PE_NAME_CACHE_TYPE { + struct parse_events_data__events *data = _data; struct list_head *list = NULL; - ABORT_ON(parse_events_add_cache(&list, idx, $1, NULL, NULL)); + ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, NULL, NULL)); $$ = list; } event_legacy_mem: PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc { + struct parse_events_data__events *data = _data; struct list_head *list = NULL; - ABORT_ON(parse_events_add_breakpoint(&list, idx, (void *) $2, $4)); + ABORT_ON(parse_events_add_breakpoint(&list, &data->idx, + (void *) $2, $4)); $$ = list; } | PE_PREFIX_MEM PE_VALUE sep_dc { + struct parse_events_data__events *data = _data; struct list_head *list = NULL; - ABORT_ON(parse_events_add_breakpoint(&list, idx, (void *) $2, NULL)); + ABORT_ON(parse_events_add_breakpoint(&list, &data->idx, + (void *) $2, NULL)); $$ = list; } event_legacy_tracepoint: PE_NAME ':' PE_NAME { + struct parse_events_data__events *data = _data; struct list_head *list = NULL; - ABORT_ON(parse_events_add_tracepoint(&list, idx, $1, $3)); + ABORT_ON(parse_events_add_tracepoint(&list, &data->idx, $1, $3)); $$ = list; } event_legacy_numeric: PE_VALUE ':' PE_VALUE { + struct parse_events_data__events *data = _data; struct list_head *list = NULL; - ABORT_ON(parse_events_add_numeric(&list, idx, $1, $3, NULL)); + ABORT_ON(parse_events_add_numeric(&list, &data->idx, $1, $3, NULL)); $$ = list; } event_legacy_raw: PE_RAW { + struct parse_events_data__events *data = _data; struct list_head *list = NULL; - ABORT_ON(parse_events_add_numeric(&list, idx, PERF_TYPE_RAW, $1, NULL)); + ABORT_ON(parse_events_add_numeric(&list, &data->idx, + PERF_TYPE_RAW, $1, NULL)); $$ = list; } @@ -267,8 +286,7 @@ sep_slash_dc: '/' | ':' | %% -void parse_events_error(struct list_head *list_all __used, - int *idx __used, +void parse_events_error(void *data __used, char const *msg __used) { } -- cgit v1.2.3 From ac20de6fff445d6deb0c44c25946d198f79f2f00 Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Fri, 15 Jun 2012 14:31:39 +0800 Subject: perf/tool: Make the event parser re-entrant Make the event parser reentrant by creating separate scanner for each parsing. The scanner is passed to the bison as and argument to the lexer. Signed-off-by: Zheng Yan [ Cleaned up the patch. ] Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1339741902-8449-11-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 35 +++++++++---- tools/perf/util/parse-events.h | 2 +- tools/perf/util/parse-events.l | 116 ++++++++++++++++++++++++----------------- tools/perf/util/parse-events.y | 9 ++-- 4 files changed, 98 insertions(+), 64 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index c71b29ad26ec..ca8665e19c0d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -11,6 +11,7 @@ #include "cache.h" #include "header.h" #include "debugfs.h" +#include "parse-events-bison.h" #include "parse-events-flex.h" #include "pmu.h" @@ -26,7 +27,7 @@ struct event_symbol { #ifdef PARSER_DEBUG extern int parse_events_debug; #endif -int parse_events_parse(void *data); +int parse_events_parse(void *data, void *scanner); #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x @@ -787,26 +788,38 @@ int parse_events_modifier(struct list_head *list, char *str) return 0; } -int parse_events(struct perf_evlist *evlist, const char *str, int unset __used) +static int parse_events__scanner(const char *str, void *data) { - struct parse_events_data__events data = { - .list = LIST_HEAD_INIT(data.list), - .idx = evlist->nr_entries, - }; YY_BUFFER_STATE buffer; + void *scanner; int ret; - buffer = parse_events__scan_string(str); + ret = parse_events_lex_init(&scanner); + if (ret) + return ret; + + buffer = parse_events__scan_string(str, scanner); #ifdef PARSER_DEBUG parse_events_debug = 1; #endif - ret = parse_events_parse(&data); + ret = parse_events_parse(data, scanner); + + parse_events__flush_buffer(buffer, scanner); + parse_events__delete_buffer(buffer, scanner); + parse_events_lex_destroy(scanner); + return ret; +} - parse_events__flush_buffer(buffer); - parse_events__delete_buffer(buffer); - parse_events_lex_destroy(); +int parse_events(struct perf_evlist *evlist, const char *str, int unset __used) +{ + struct parse_events_data__events data = { + .list = LIST_HEAD_INIT(data.list), + .idx = evlist->nr_entries, + }; + int ret; + ret = parse_events__scanner(str, &data); if (!ret) { int entries = data.idx - evlist->nr_entries; perf_evlist__splice_list_tail(evlist, &data.list, entries); diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index dc3c83a0ab8a..fa2b19b862e2 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -88,7 +88,7 @@ int parse_events_add_pmu(struct list_head **list, int *idx, char *pmu , struct list_head *head_config); void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all); -void parse_events_error(void *data, char const *msg); +void parse_events_error(void *data, void *scanner, char const *msg); int parse_events__test(void); void print_events(const char *event_glob); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 618a8e788399..329794eea711 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -1,4 +1,6 @@ +%option reentrant +%option bison-bridge %option prefix="parse_events_" %option stack @@ -8,7 +10,10 @@ #include "parse-events-bison.h" #include "parse-events.h" -static int __value(char *str, int base, int token) +char *parse_events_get_text(yyscan_t yyscanner); +YYSTYPE *parse_events_get_lval(yyscan_t yyscanner); + +static int __value(YYSTYPE *yylval, char *str, int base, int token) { long num; @@ -17,35 +22,48 @@ static int __value(char *str, int base, int token) if (errno) return PE_ERROR; - parse_events_lval.num = num; + yylval->num = num; return token; } -static int value(int base) +static int value(yyscan_t scanner, int base) { - return __value(parse_events_text, base, PE_VALUE); + YYSTYPE *yylval = parse_events_get_lval(scanner); + char *text = parse_events_get_text(scanner); + + return __value(yylval, text, base, PE_VALUE); } -static int raw(void) +static int raw(yyscan_t scanner) { - return __value(parse_events_text + 1, 16, PE_RAW); + YYSTYPE *yylval = parse_events_get_lval(scanner); + char *text = parse_events_get_text(scanner); + + return __value(yylval, text + 1, 16, PE_RAW); } -static int str(int token) +static int str(yyscan_t scanner, int token) { - parse_events_lval.str = strdup(parse_events_text); + YYSTYPE *yylval = parse_events_get_lval(scanner); + char *text = parse_events_get_text(scanner); + + yylval->str = strdup(text); return token; } -static int sym(int type, int config) +static int sym(yyscan_t scanner, int type, int config) { - parse_events_lval.num = (type << 16) + config; + YYSTYPE *yylval = parse_events_get_lval(scanner); + + yylval->num = (type << 16) + config; return PE_VALUE_SYM; } -static int term(int type) +static int term(yyscan_t scanner, int type) { - parse_events_lval.num = type; + YYSTYPE *yylval = parse_events_get_lval(scanner); + + yylval->num = type; return PE_TERM; } @@ -61,25 +79,25 @@ modifier_event [ukhpGH]{1,8} modifier_bp [rwx] %% -cpu-cycles|cycles { return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); } -stalled-cycles-frontend|idle-cycles-frontend { return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } -stalled-cycles-backend|idle-cycles-backend { return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } -instructions { return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); } -cache-references { return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); } -cache-misses { return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); } -branch-instructions|branches { return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); } -branch-misses { return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); } -bus-cycles { return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); } -ref-cycles { return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); } -cpu-clock { return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); } -task-clock { return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); } -page-faults|faults { return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); } -minor-faults { return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); } -major-faults { return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); } -context-switches|cs { return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); } -cpu-migrations|migrations { return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); } -alignment-faults { return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } -emulation-faults { return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } +cpu-cycles|cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); } +stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } +stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } +instructions { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); } +cache-references { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); } +cache-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); } +branch-instructions|branches { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); } +branch-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); } +bus-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); } +ref-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); } +cpu-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); } +task-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); } +page-faults|faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); } +minor-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); } +major-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); } +context-switches|cs { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); } +cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); } +alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } +emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } L1-dcache|l1-d|l1d|L1-data | L1-icache|l1-i|l1i|L1-instruction | @@ -87,14 +105,14 @@ LLC|L2 | dTLB|d-tlb|Data-TLB | iTLB|i-tlb|Instruction-TLB | branch|branches|bpu|btb|bpc | -node { return str(PE_NAME_CACHE_TYPE); } +node { return str(yyscanner, PE_NAME_CACHE_TYPE); } load|loads|read | store|stores|write | prefetch|prefetches | speculative-read|speculative-load | refs|Reference|ops|access | -misses|miss { return str(PE_NAME_CACHE_OP_RESULT); } +misses|miss { return str(yyscanner, PE_NAME_CACHE_OP_RESULT); } /* * These are event config hardcoded term names to be specified @@ -102,20 +120,20 @@ misses|miss { return str(PE_NAME_CACHE_OP_RESULT); } * so we can put them here directly. In case the we have a conflict * in future, this needs to go into '//' condition block. */ -config { return term(PARSE_EVENTS__TERM_TYPE_CONFIG); } -config1 { return term(PARSE_EVENTS__TERM_TYPE_CONFIG1); } -config2 { return term(PARSE_EVENTS__TERM_TYPE_CONFIG2); } -name { return term(PARSE_EVENTS__TERM_TYPE_NAME); } -period { return term(PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } -branch_type { return term(PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); } +config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } +config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } +config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } +name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); } +period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } +branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); } mem: { BEGIN(mem); return PE_PREFIX_MEM; } -r{num_raw_hex} { return raw(); } -{num_dec} { return value(10); } -{num_hex} { return value(16); } +r{num_raw_hex} { return raw(yyscanner); } +{num_dec} { return value(yyscanner, 10); } +{num_hex} { return value(yyscanner, 16); } -{modifier_event} { return str(PE_MODIFIER_EVENT); } -{name} { return str(PE_NAME); } +{modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } +{name} { return str(yyscanner, PE_NAME); } "/" { return '/'; } - { return '-'; } , { return ','; } @@ -123,17 +141,17 @@ r{num_raw_hex} { return raw(); } = { return '='; } { -{modifier_bp} { return str(PE_MODIFIER_BP); } +{modifier_bp} { return str(yyscanner, PE_MODIFIER_BP); } : { return ':'; } -{num_dec} { return value(10); } -{num_hex} { return value(16); } +{num_dec} { return value(yyscanner, 10); } +{num_hex} { return value(yyscanner, 16); } /* * We need to separate 'mem:' scanner part, in order to get specific * modifier bits parsed out. Otherwise we would need to handle PE_NAME * and we'd need to parse it manually. During the escape from * state we need to put the escaping char back, so we dont miss it. */ -. { unput(*parse_events_text); BEGIN(INITIAL); } +. { unput(*yytext); BEGIN(INITIAL); } /* * We destroy the scanner after reaching EOF, * but anyway just to be sure get back to INIT state. @@ -143,7 +161,7 @@ r{num_raw_hex} { return raw(); } %% -int parse_events_wrap(void) +int parse_events_wrap(void *scanner __used) { return 1; } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index e533bf72ba9c..2a93d5c8ccda 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -1,6 +1,8 @@ - +%pure-parser %name-prefix "parse_events_" %parse-param {void *_data} +%parse-param {void *scanner} +%lex-param {void* scanner} %{ @@ -11,8 +13,9 @@ #include "types.h" #include "util.h" #include "parse-events.h" +#include "parse-events-bison.h" -extern int parse_events_lex (void); +extern int parse_events_lex (YYSTYPE* lvalp, void* scanner); #define ABORT_ON(val) \ do { \ @@ -286,7 +289,7 @@ sep_slash_dc: '/' | ':' | %% -void parse_events_error(void *data __used, +void parse_events_error(void *data __used, void *scanner __used, char const *msg __used) { } -- cgit v1.2.3 From 90e2b22dee908c13df256140a0d6527e3e8ea3f4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 15 Jun 2012 14:31:40 +0800 Subject: perf/tool: Add support to reuse event grammar to parse out terms We want to reuse the event grammar for parsing aliased terms. The obvious reason is we dont need to add new code when there's already support for this in event grammar. Doing this by adding terms and event start entries into event parse grammar. The grammar forks on the begining based on the starting token, which is supplied via bison interface into the lexer. The lexer then returns the starting token as the first token, thus making the grammar switch accordingly. Currently 2 starting tokens/grammars are supported: PE_START_TERMS, PE_START_EVENTS The PE_START_TERMS related grammar uses 'event_config' part of the grammar for term parsing. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1339741902-8449-12-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 28 +++++++++++++++++++++++++--- tools/perf/util/parse-events.h | 5 +++++ tools/perf/util/parse-events.l | 13 +++++++++++++ tools/perf/util/parse-events.y | 12 ++++++++++++ 4 files changed, 55 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index ca8665e19c0d..d002170adb3f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -12,6 +12,7 @@ #include "header.h" #include "debugfs.h" #include "parse-events-bison.h" +#define YY_EXTRA_TYPE int #include "parse-events-flex.h" #include "pmu.h" @@ -788,13 +789,13 @@ int parse_events_modifier(struct list_head *list, char *str) return 0; } -static int parse_events__scanner(const char *str, void *data) +static int parse_events__scanner(const char *str, void *data, int start_token) { YY_BUFFER_STATE buffer; void *scanner; int ret; - ret = parse_events_lex_init(&scanner); + ret = parse_events_lex_init_extra(start_token, &scanner); if (ret) return ret; @@ -811,6 +812,27 @@ static int parse_events__scanner(const char *str, void *data) return ret; } +/* + * parse event config string, return a list of event terms. + */ +int parse_events_terms(struct list_head *terms, const char *str) +{ + struct parse_events_data__terms data = { + .terms = NULL, + }; + int ret; + + ret = parse_events__scanner(str, &data, PE_START_TERMS); + if (!ret) { + list_splice(data.terms, terms); + free(data.terms); + return 0; + } + + parse_events__free_terms(data.terms); + return ret; +} + int parse_events(struct perf_evlist *evlist, const char *str, int unset __used) { struct parse_events_data__events data = { @@ -819,7 +841,7 @@ int parse_events(struct perf_evlist *evlist, const char *str, int unset __used) }; int ret; - ret = parse_events__scanner(str, &data); + ret = parse_events__scanner(str, &data, PE_START_EVENTS); if (!ret) { int entries = data.idx - evlist->nr_entries; perf_evlist__splice_list_tail(evlist, &data.list, entries); diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index fa2b19b862e2..9896edadbe62 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -33,6 +33,7 @@ extern int parse_events_option(const struct option *opt, const char *str, int unset); extern int parse_events(struct perf_evlist *evlist, const char *str, int unset); +extern int parse_events_terms(struct list_head *terms, const char *str); extern int parse_filter(const struct option *opt, const char *str, int unset); #define EVENTS_HELP_MAX (128*1024) @@ -68,6 +69,10 @@ struct parse_events_data__events { int idx; }; +struct parse_events_data__terms { + struct list_head *terms; +}; + int parse_events__is_hardcoded_term(struct parse_events__term *term); int parse_events__term_num(struct parse_events__term **_term, int type_term, char *config, long num); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 329794eea711..488362e14133 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -79,6 +79,19 @@ modifier_event [ukhpGH]{1,8} modifier_bp [rwx] %% + +%{ + { + int start_token; + + start_token = (int) parse_events_get_extra(yyscanner); + if (start_token) { + parse_events_set_extra(NULL, yyscanner); + return start_token; + } + } +%} + cpu-cycles|cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); } stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 2a93d5c8ccda..9525c455d27f 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -25,6 +25,7 @@ do { \ %} +%token PE_START_EVENTS PE_START_TERMS %token PE_VALUE PE_VALUE_SYM PE_RAW PE_TERM %token PE_NAME %token PE_MODIFIER_EVENT PE_MODIFIER_BP @@ -60,6 +61,11 @@ do { \ } %% +start: +PE_START_EVENTS events +| +PE_START_TERMS terms + events: events ',' event | event @@ -209,6 +215,12 @@ PE_RAW $$ = list; } +terms: event_config +{ + struct parse_events_data__terms *data = _data; + data->terms = $1; +} + event_config: event_config ',' event_term { -- cgit v1.2.3 From a6146d5040cce560f700221158d77dd335eed332 Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Fri, 15 Jun 2012 14:31:41 +0800 Subject: perf/tool: Add PMU event alias support Add support to specify alias term within the event description. The definition of pmu event alias is located at: ${sysfs_mount}/bus/event_source/devices/${pmu}/events/ Each file in the 'events' directory defines a event alias. Its contents are like: config=1,config1=2 Using pmu event alias, an event can be now specified like: uncore/CLOCKTICKS/ or uncore/event=CLOCKTICKS/ Signed-off-by: Zheng Yan [ Cleaned it up. ] Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1339741902-8449-13-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 10 +++ tools/perf/util/parse-events.h | 2 + tools/perf/util/pmu.c | 166 +++++++++++++++++++++++++++++++++++++++++ tools/perf/util/pmu.h | 11 ++- 4 files changed, 188 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d002170adb3f..3339424cc421 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -701,6 +701,9 @@ int parse_events_add_pmu(struct list_head **list, int *idx, memset(&attr, 0, sizeof(attr)); + if (perf_pmu__check_alias(pmu, head_config)) + return -EINVAL; + /* * Configure hardcoded terms first, no need to check * return value when called with fail == 0 ;) @@ -1143,6 +1146,13 @@ int parse_events__term_str(struct parse_events__term **term, config, str, 0); } +int parse_events__term_clone(struct parse_events__term **new, + struct parse_events__term *term) +{ + return new_term(new, term->type_val, term->type_term, term->config, + term->val.str, term->val.num); +} + void parse_events__free_terms(struct list_head *terms) { struct parse_events__term *term, *h; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 9896edadbe62..a2c71684f6a4 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -78,6 +78,8 @@ int parse_events__term_num(struct parse_events__term **_term, int type_term, char *config, long num); int parse_events__term_str(struct parse_events__term **_term, int type_term, char *config, char *str); +int parse_events__term_clone(struct parse_events__term **new, + struct parse_events__term *term); void parse_events__free_terms(struct list_head *terms); int parse_events_modifier(struct list_head *list, char *str); int parse_events_add_tracepoint(struct list_head **list, int *idx, diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index a119a5371699..74d0948ec368 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -80,6 +80,114 @@ static int pmu_format(char *name, struct list_head *format) return 0; } +static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file) +{ + struct perf_pmu__alias *alias; + char buf[256]; + int ret; + + ret = fread(buf, 1, sizeof(buf), file); + if (ret == 0) + return -EINVAL; + buf[ret] = 0; + + alias = malloc(sizeof(*alias)); + if (!alias) + return -ENOMEM; + + INIT_LIST_HEAD(&alias->terms); + ret = parse_events_terms(&alias->terms, buf); + if (ret) { + free(alias); + return ret; + } + + alias->name = strdup(name); + list_add_tail(&alias->list, list); + return 0; +} + +/* + * Process all the sysfs attributes located under the directory + * specified in 'dir' parameter. + */ +static int pmu_aliases_parse(char *dir, struct list_head *head) +{ + struct dirent *evt_ent; + DIR *event_dir; + int ret = 0; + + event_dir = opendir(dir); + if (!event_dir) + return -EINVAL; + + while (!ret && (evt_ent = readdir(event_dir))) { + char path[PATH_MAX]; + char *name = evt_ent->d_name; + FILE *file; + + if (!strcmp(name, ".") || !strcmp(name, "..")) + continue; + + snprintf(path, PATH_MAX, "%s/%s", dir, name); + + ret = -EINVAL; + file = fopen(path, "r"); + if (!file) + break; + ret = perf_pmu__new_alias(head, name, file); + fclose(file); + } + + closedir(event_dir); + return ret; +} + +/* + * Reading the pmu event aliases definition, which should be located at: + * /sys/bus/event_source/devices//events as sysfs group attributes. + */ +static int pmu_aliases(char *name, struct list_head *head) +{ + struct stat st; + char path[PATH_MAX]; + const char *sysfs; + + sysfs = sysfs_find_mountpoint(); + if (!sysfs) + return -1; + + snprintf(path, PATH_MAX, + "%s/bus/event_source/devices/%s/events", sysfs, name); + + if (stat(path, &st) < 0) + return -1; + + if (pmu_aliases_parse(path, head)) + return -1; + + return 0; +} + +static int pmu_alias_terms(struct perf_pmu__alias *alias, + struct list_head *terms) +{ + struct parse_events__term *term, *clone; + LIST_HEAD(list); + int ret; + + list_for_each_entry(term, &alias->terms, list) { + ret = parse_events__term_clone(&clone, term); + if (ret) { + parse_events__free_terms(&list); + return ret; + } + list_add_tail(&clone->list, &list); + } + list_splice(&list, terms); + return 0; +} + /* * Reading/parsing the default pmu type value, which should be * located at: @@ -118,6 +226,7 @@ static struct perf_pmu *pmu_lookup(char *name) { struct perf_pmu *pmu; LIST_HEAD(format); + LIST_HEAD(aliases); __u32 type; /* @@ -135,8 +244,12 @@ static struct perf_pmu *pmu_lookup(char *name) if (!pmu) return NULL; + pmu_aliases(name, &aliases); + INIT_LIST_HEAD(&pmu->format); + INIT_LIST_HEAD(&pmu->aliases); list_splice(&format, &pmu->format); + list_splice(&aliases, &pmu->aliases); pmu->name = strdup(name); pmu->type = type; return pmu; @@ -279,6 +392,59 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, return pmu_config(&pmu->format, attr, head_terms); } +static struct perf_pmu__alias *pmu_find_alias(struct perf_pmu *pmu, + struct parse_events__term *term) +{ + struct perf_pmu__alias *alias; + char *name; + + if (parse_events__is_hardcoded_term(term)) + return NULL; + + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { + if (term->val.num != 1) + return NULL; + if (pmu_find_format(&pmu->format, term->config)) + return NULL; + name = term->config; + } else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) { + if (strcasecmp(term->config, "event")) + return NULL; + name = term->val.str; + } else { + return NULL; + } + + list_for_each_entry(alias, &pmu->aliases, list) { + if (!strcasecmp(alias->name, name)) + return alias; + } + return NULL; +} + +/* + * Find alias in the terms list and replace it with the terms + * defined for the alias + */ +int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms) +{ + struct parse_events__term *term, *h; + struct perf_pmu__alias *alias; + int ret; + + list_for_each_entry_safe(term, h, head_terms, list) { + alias = pmu_find_alias(pmu, term); + if (!alias) + continue; + ret = pmu_alias_terms(alias, &term->list); + if (ret) + return ret; + list_del(&term->list); + free(term); + } + return 0; +} + int perf_pmu__new_format(struct list_head *list, char *name, int config, unsigned long *bits) { diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 68c0db965e1f..535f2c5258ab 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -19,17 +19,26 @@ struct perf_pmu__format { struct list_head list; }; +struct perf_pmu__alias { + char *name; + struct list_head terms; + struct list_head list; +}; + struct perf_pmu { char *name; __u32 type; struct list_head format; + struct list_head aliases; struct list_head list; }; struct perf_pmu *perf_pmu__find(char *name); int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, struct list_head *head_terms); - +int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms); +struct list_head *perf_pmu__alias(struct perf_pmu *pmu, + struct list_head *head_terms); int perf_pmu_wrap(void); void perf_pmu_error(struct list_head *list, char *name, char const *msg); -- cgit v1.2.3 From 4429392e1484319df4209d41a98244c76d0caf56 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 15 Jun 2012 14:31:42 +0800 Subject: perf/tool: Add automated test for pure terms parsing Adding automated test for parsing terms out of the event grammar. Also slightly changing current event parsing test functions to follow up more generic namespace. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1339741902-8449-14-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events-test.c | 122 ++++++++++++++++++++++++++++++++++-- 1 file changed, 117 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index 76b98e2a587d..af1039cdb853 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -430,6 +430,49 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist) return 0; } +static int test__checkterms_simple(struct list_head *terms) +{ + struct parse_events__term *term; + + /* config=10 */ + term = list_entry(terms->next, struct parse_events__term, list); + TEST_ASSERT_VAL("wrong type term", + term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG); + TEST_ASSERT_VAL("wrong type val", + term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + TEST_ASSERT_VAL("wrong val", term->val.num == 10); + TEST_ASSERT_VAL("wrong config", !term->config); + + /* config1 */ + term = list_entry(term->list.next, struct parse_events__term, list); + TEST_ASSERT_VAL("wrong type term", + term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG1); + TEST_ASSERT_VAL("wrong type val", + term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + TEST_ASSERT_VAL("wrong val", term->val.num == 1); + TEST_ASSERT_VAL("wrong config", !term->config); + + /* config2=3 */ + term = list_entry(term->list.next, struct parse_events__term, list); + TEST_ASSERT_VAL("wrong type term", + term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG2); + TEST_ASSERT_VAL("wrong type val", + term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + TEST_ASSERT_VAL("wrong val", term->val.num == 3); + TEST_ASSERT_VAL("wrong config", !term->config); + + /* umask=1*/ + term = list_entry(term->list.next, struct parse_events__term, list); + TEST_ASSERT_VAL("wrong type term", + term->type_term == PARSE_EVENTS__TERM_TYPE_USER); + TEST_ASSERT_VAL("wrong type val", + term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + TEST_ASSERT_VAL("wrong val", term->val.num == 1); + TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "umask")); + + return 0; +} + struct test__event_st { const char *name; __u32 type; @@ -559,7 +602,23 @@ static struct test__event_st test__events_pmu[] = { #define TEST__EVENTS_PMU_CNT (sizeof(test__events_pmu) / \ sizeof(struct test__event_st)) -static int test(struct test__event_st *e) +struct test__term { + const char *str; + __u32 type; + int (*check)(struct list_head *terms); +}; + +static struct test__term test__terms[] = { + [0] = { + .str = "config=10,config1,config2=3,umask=1", + .check = test__checkterms_simple, + }, +}; + +#define TEST__TERMS_CNT (sizeof(test__terms) / \ + sizeof(struct test__term)) + +static int test_event(struct test__event_st *e) { struct perf_evlist *evlist; int ret; @@ -590,7 +649,48 @@ static int test_events(struct test__event_st *events, unsigned cnt) struct test__event_st *e = &events[i]; pr_debug("running test %d '%s'\n", i, e->name); - ret = test(e); + ret = test_event(e); + if (ret) + break; + } + + return ret; +} + +static int test_term(struct test__term *t) +{ + struct list_head *terms; + int ret; + + terms = malloc(sizeof(*terms)); + if (!terms) + return -ENOMEM; + + INIT_LIST_HEAD(terms); + + ret = parse_events_terms(terms, t->str); + if (ret) { + pr_debug("failed to parse terms '%s', err %d\n", + t->str , ret); + return ret; + } + + ret = t->check(terms); + parse_events__free_terms(terms); + + return ret; +} + +static int test_terms(struct test__term *terms, unsigned cnt) +{ + int ret = 0; + unsigned i; + + for (i = 0; i < cnt; i++) { + struct test__term *t = &terms[i]; + + pr_debug("running test %d '%s'\n", i, t->str); + ret = test_term(t); if (ret) break; } @@ -617,9 +717,21 @@ int parse_events__test(void) { int ret; - ret = test_events(test__events, TEST__EVENTS_CNT); - if (!ret && test_pmu()) - ret = test_events(test__events_pmu, TEST__EVENTS_PMU_CNT); + do { + ret = test_events(test__events, TEST__EVENTS_CNT); + if (ret) + break; + + if (test_pmu()) { + ret = test_events(test__events_pmu, + TEST__EVENTS_PMU_CNT); + if (ret) + break; + } + + ret = test_terms(test__terms, TEST__TERMS_CNT); + + } while (0); return ret; } -- cgit v1.2.3 From 2992c542fcd40777ed253f57362c65711fb8acaf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 16 Jun 2012 14:45:49 +0200 Subject: perf/x86: Lowercase uncore PMU event names Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-ucnds8gkve4x3s4biuukyph3@git.kernel.org [ Trivial build fix ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 51 +++++++++------------------ 1 file changed, 16 insertions(+), 35 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index d34f68bf990b..28a8413ca199 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -179,22 +179,17 @@ static struct attribute *snbep_uncore_pcu_formats_attr[] = { }; static struct uncore_event_desc snbep_uncore_imc_events[] = { - INTEL_UNCORE_EVENT_DESC(CLOCKTICKS, "config=0xffff"), - /* read */ - INTEL_UNCORE_EVENT_DESC(CAS_COUNT_RD, "event=0x4,umask=0x3"), - /* write */ - INTEL_UNCORE_EVENT_DESC(CAS_COUNT_WR, "event=0x4,umask=0xc"), + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0xff"), + INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"), + INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"), { /* end: all zeroes */ }, }; static struct uncore_event_desc snbep_uncore_qpi_events[] = { - INTEL_UNCORE_EVENT_DESC(CLOCKTICKS, "event=0x14"), - /* outgoing data+nondata flits */ - INTEL_UNCORE_EVENT_DESC(TxL_FLITS_ACTIVE, "event=0x0,umask=0x6"), - /* DRS data received */ - INTEL_UNCORE_EVENT_DESC(DRS_DATA, "event=0x2,umask=0x8"), - /* NCB data received */ - INTEL_UNCORE_EVENT_DESC(NCB_DATA, "event=0x3,umask=0x4"), + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"), + INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"), + INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x02,umask=0x08"), + INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x03,umask=0x04"), { /* end: all zeroes */ }, }; @@ -621,29 +616,15 @@ static struct attribute_group nhm_uncore_format_group = { }; static struct uncore_event_desc nhm_uncore_events[] = { - INTEL_UNCORE_EVENT_DESC(CLOCKTICKS, "config=0xffff"), - /* full cache line writes to DRAM */ - INTEL_UNCORE_EVENT_DESC(QMC_WRITES_FULL_ANY, "event=0x2f,umask=0xf"), - /* Quickpath Memory Controller normal priority read requests */ - INTEL_UNCORE_EVENT_DESC(QMC_NORMAL_READS_ANY, "event=0x2c,umask=0xf"), - /* Quickpath Home Logic read requests from the IOH */ - INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_IOH_READS, - "event=0x20,umask=0x1"), - /* Quickpath Home Logic write requests from the IOH */ - INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_IOH_WRITES, - "event=0x20,umask=0x2"), - /* Quickpath Home Logic read requests from a remote socket */ - INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_REMOTE_READS, - "event=0x20,umask=0x4"), - /* Quickpath Home Logic write requests from a remote socket */ - INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_REMOTE_WRITES, - "event=0x20,umask=0x8"), - /* Quickpath Home Logic read requests from the local socket */ - INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_LOCAL_READS, - "event=0x20,umask=0x10"), - /* Quickpath Home Logic write requests from the local socket */ - INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_LOCAL_WRITES, - "event=0x20,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0xff"), + INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"), + INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"), + INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"), + INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes, "event=0x20,umask=0x02"), + INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads, "event=0x20,umask=0x04"), + INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"), + INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads, "event=0x20,umask=0x10"), + INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes, "event=0x20,umask=0x20"), { /* end: all zeroes */ }, }; -- cgit v1.2.3 From 409a8be61560c5875816da6dcb0c575d78145a5c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 30 May 2012 10:33:24 -0300 Subject: perf tools: Add sort by src line/number Using addr2line for now, requires debuginfo, needs more work to support detached debuginfo, aka foo-debuginfo packages. Example: [root@sandy ~]# perf record -a sleep 3 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.555 MB perf.data (~24236 samples) ] [root@sandy ~]# perf report -s dso,srcline 2>&1 | grep -v ^# | head -5 22.41% [kernel.kallsyms] /home/git/linux/drivers/idle/intel_idle.c:280 4.79% [kernel.kallsyms] /home/git/linux/drivers/cpuidle/cpuidle.c:148 4.78% [kernel.kallsyms] /home/git/linux/arch/x86/include/asm/atomic64_64.h:121 4.49% [kernel.kallsyms] /home/git/linux/kernel/sched/core.c:1690 4.30% [kernel.kallsyms] /home/git/linux/include/linux/seqlock.h:90 [root@sandy ~]# [root@sandy ~]# perf top -U -s dso,symbol,srcline Samples: 1K of event 'cycles', Event count (approx.): 589617389 18.66% [kernel] [k] copy_user_generic_unrolled /home/git/linux/arch/x86/lib/copy_user_64.S:143 7.83% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:39 6.59% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:38 3.66% [kernel] [k] page_fault /home/git/linux/arch/x86/kernel/entry_64.S:1379 3.25% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:40 3.12% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:37 2.74% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:36 2.39% [kernel] [k] clear_page /home/git/linux/arch/x86/lib/clear_page_64.S:43 2.12% [kernel] [k] ioread32 /home/git/linux/lib/iomap.c:90 1.51% [kernel] [k] copy_user_generic_unrolled /home/git/linux/arch/x86/lib/copy_user_64.S:144 1.19% [kernel] [k] copy_user_generic_unrolled /home/git/linux/arch/x86/lib/copy_user_64.S:154 Suggested-by: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-pdmqbng9twz06jzkbgtuwbp8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 2 +- tools/perf/Documentation/perf-top.txt | 2 +- tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 49 ++++++++++++++++++++++++++++++++ tools/perf/util/sort.h | 2 ++ 5 files changed, 54 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 2d89f02719b5..495210a612c4 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -57,7 +57,7 @@ OPTIONS -s:: --sort=:: - Sort by key(s): pid, comm, dso, symbol, parent. + Sort by key(s): pid, comm, dso, symbol, parent, srcline. -p:: --parent=:: diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 4a5680cb242e..5b80d84d6b4a 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -112,7 +112,7 @@ Default is to monitor all CPUS. -s:: --sort:: - Sort by key(s): pid, comm, dso, symbol, parent + Sort by key(s): pid, comm, dso, symbol, parent, srcline. -n:: --show-nr-samples:: diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 34bb556d6219..0b096c27a419 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -47,6 +47,7 @@ enum hist_column { HISTC_SYMBOL_TO, HISTC_DSO_FROM, HISTC_DSO_TO, + HISTC_SRCLINE, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index a27237430c5f..0f5a0a496bc4 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -241,6 +241,54 @@ struct sort_entry sort_sym = { .se_width_idx = HISTC_SYMBOL, }; +/* --sort srcline */ + +static int64_t +sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return (int64_t)(right->ip - left->ip); +} + +static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width __used) +{ + FILE *fp; + char cmd[PATH_MAX + 2], *path = self->srcline, *nl; + size_t line_len; + + if (path != NULL) + goto out_path; + + snprintf(cmd, sizeof(cmd), "addr2line -e %s %016" PRIx64, + self->ms.map->dso->long_name, self->ip); + fp = popen(cmd, "r"); + if (!fp) + goto out_ip; + + if (getline(&path, &line_len, fp) < 0 || !line_len) + goto out_ip; + fclose(fp); + self->srcline = strdup(path); + if (self->srcline == NULL) + goto out_ip; + + nl = strchr(self->srcline, '\n'); + if (nl != NULL) + *nl = '\0'; + path = self->srcline; +out_path: + return repsep_snprintf(bf, size, "%s", path); +out_ip: + return repsep_snprintf(bf, size, "%-#*llx", BITS_PER_LONG / 4, self->ip); +} + +struct sort_entry sort_srcline = { + .se_header = "Source:Line", + .se_cmp = sort__srcline_cmp, + .se_snprintf = hist_entry__srcline_snprintf, + .se_width_idx = HISTC_SRCLINE, +}; + /* --sort parent */ static int64_t @@ -439,6 +487,7 @@ static struct sort_dimension sort_dimensions[] = { DIM(SORT_PARENT, "parent", sort_parent), DIM(SORT_CPU, "cpu", sort_cpu), DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), + DIM(SORT_SRCLINE, "srcline", sort_srcline), }; int sort_dimension__add(const char *tok) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 472aa5a63a58..e724b26acd51 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -71,6 +71,7 @@ struct hist_entry { char level; bool used; u8 filtered; + char *srcline; struct symbol *parent; union { unsigned long position; @@ -93,6 +94,7 @@ enum sort_type { SORT_SYM_FROM, SORT_SYM_TO, SORT_MISPREDICT, + SORT_SRCLINE, }; /* -- cgit v1.2.3 From ba47a142d9f9b84e0464a11b7a067e5ad95c5d4b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 29 May 2012 13:22:58 +0900 Subject: perf ui: Introduce struct perf_error_ops The struct perf_error_ops is for flexible error logging. We can register appropriate functions based on front-end. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1338265382-6872-4-git-send-email-namhyung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 3 + tools/perf/ui/gtk/util.c | 20 ++++ tools/perf/ui/tui/setup.c | 6 + tools/perf/ui/tui/util.c | 243 ++++++++++++++++++++++++++++++++++++++++ tools/perf/ui/util.c | 277 ++++++++++------------------------------------ tools/perf/ui/util.h | 9 +- tools/perf/util/debug.c | 2 +- tools/perf/util/debug.h | 23 +++- 8 files changed, 357 insertions(+), 226 deletions(-) create mode 100644 tools/perf/ui/gtk/util.c create mode 100644 tools/perf/ui/tui/util.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 0eee64cfe9a0..c0ee917ae8ab 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -503,6 +503,7 @@ else LIB_OBJS += $(OUTPUT)ui/progress.o LIB_OBJS += $(OUTPUT)ui/util.o LIB_OBJS += $(OUTPUT)ui/tui/setup.o + LIB_OBJS += $(OUTPUT)ui/tui/util.o LIB_H += ui/browser.h LIB_H += ui/browsers/map.h LIB_H += ui/helpline.h @@ -526,9 +527,11 @@ else EXTLIBS += $(shell pkg-config --libs gtk+-2.0) LIB_OBJS += $(OUTPUT)ui/gtk/browser.o LIB_OBJS += $(OUTPUT)ui/gtk/setup.o + LIB_OBJS += $(OUTPUT)ui/gtk/util.o # Make sure that it'd be included only once. ifneq ($(findstring -DNO_NEWT_SUPPORT,$(BASIC_CFLAGS)),) LIB_OBJS += $(OUTPUT)ui/setup.o + LIB_OBJS += $(OUTPUT)ui/util.o endif endif endif diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c new file mode 100644 index 000000000000..a727fe394e91 --- /dev/null +++ b/tools/perf/ui/gtk/util.c @@ -0,0 +1,20 @@ +#include "../util.h" +#include "../../util/debug.h" +#include "gtk.h" + + +/* + * FIXME: Functions below should be implemented properly. + * For now, just add stubs for NO_NEWT=1 build. + */ +#ifdef NO_NEWT_SUPPORT +int ui_helpline__show_help(const char *format __used, va_list ap __used) +{ + return 0; +} + +void ui_progress__update(u64 curr __used, u64 total __used, + const char *title __used) +{ +} +#endif diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index d33e943ac434..e813c1d17346 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -15,6 +15,8 @@ pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; static volatile int ui__need_resize; +extern struct perf_error_ops perf_tui_eops; + void ui__refresh_dimensions(bool force) { if (force || ui__need_resize) { @@ -122,6 +124,8 @@ int ui__init(void) signal(SIGINT, ui__signal); signal(SIGQUIT, ui__signal); signal(SIGTERM, ui__signal); + + perf_error__register(&perf_tui_eops); out: return err; } @@ -137,4 +141,6 @@ void ui__exit(bool wait_for_ok) SLsmg_refresh(); SLsmg_reset_smg(); SLang_reset_tty(); + + perf_error__unregister(&perf_tui_eops); } diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c new file mode 100644 index 000000000000..092902e30cee --- /dev/null +++ b/tools/perf/ui/tui/util.c @@ -0,0 +1,243 @@ +#include "../../util/util.h" +#include +#include +#include +#include + +#include "../../util/cache.h" +#include "../../util/debug.h" +#include "../browser.h" +#include "../keysyms.h" +#include "../helpline.h" +#include "../ui.h" +#include "../util.h" +#include "../libslang.h" + +static void ui_browser__argv_write(struct ui_browser *browser, + void *entry, int row) +{ + char **arg = entry; + bool current_entry = ui_browser__is_current_entry(browser, row); + + ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : + HE_COLORSET_NORMAL); + slsmg_write_nstring(*arg, browser->width); +} + +static int popup_menu__run(struct ui_browser *menu) +{ + int key; + + if (ui_browser__show(menu, " ", "ESC: exit, ENTER|->: Select option") < 0) + return -1; + + while (1) { + key = ui_browser__run(menu, 0); + + switch (key) { + case K_RIGHT: + case K_ENTER: + key = menu->index; + break; + case K_LEFT: + case K_ESC: + case 'q': + case CTRL('c'): + key = -1; + break; + default: + continue; + } + + break; + } + + ui_browser__hide(menu); + return key; +} + +int ui__popup_menu(int argc, char * const argv[]) +{ + struct ui_browser menu = { + .entries = (void *)argv, + .refresh = ui_browser__argv_refresh, + .seek = ui_browser__argv_seek, + .write = ui_browser__argv_write, + .nr_entries = argc, + }; + + return popup_menu__run(&menu); +} + +int ui_browser__input_window(const char *title, const char *text, char *input, + const char *exit_msg, int delay_secs) +{ + int x, y, len, key; + int max_len = 60, nr_lines = 0; + static char buf[50]; + const char *t; + + t = text; + while (1) { + const char *sep = strchr(t, '\n'); + + if (sep == NULL) + sep = strchr(t, '\0'); + len = sep - t; + if (max_len < len) + max_len = len; + ++nr_lines; + if (*sep == '\0') + break; + t = sep + 1; + } + + max_len += 2; + nr_lines += 8; + y = SLtt_Screen_Rows / 2 - nr_lines / 2; + x = SLtt_Screen_Cols / 2 - max_len / 2; + + SLsmg_set_color(0); + SLsmg_draw_box(y, x++, nr_lines, max_len); + if (title) { + SLsmg_gotorc(y, x + 1); + SLsmg_write_string((char *)title); + } + SLsmg_gotorc(++y, x); + nr_lines -= 7; + max_len -= 2; + SLsmg_write_wrapped_string((unsigned char *)text, y, x, + nr_lines, max_len, 1); + y += nr_lines; + len = 5; + while (len--) { + SLsmg_gotorc(y + len - 1, x); + SLsmg_write_nstring((char *)" ", max_len); + } + SLsmg_draw_box(y++, x + 1, 3, max_len - 2); + + SLsmg_gotorc(y + 3, x); + SLsmg_write_nstring((char *)exit_msg, max_len); + SLsmg_refresh(); + + x += 2; + len = 0; + key = ui__getch(delay_secs); + while (key != K_TIMER && key != K_ENTER && key != K_ESC) { + if (key == K_BKSPC) { + if (len == 0) + goto next_key; + SLsmg_gotorc(y, x + --len); + SLsmg_write_char(' '); + } else { + buf[len] = key; + SLsmg_gotorc(y, x + len++); + SLsmg_write_char(key); + } + SLsmg_refresh(); + + /* XXX more graceful overflow handling needed */ + if (len == sizeof(buf) - 1) { + ui_helpline__push("maximum size of symbol name reached!"); + key = K_ENTER; + break; + } +next_key: + key = ui__getch(delay_secs); + } + + buf[len] = '\0'; + strncpy(input, buf, len+1); + return key; +} + +int ui__question_window(const char *title, const char *text, + const char *exit_msg, int delay_secs) +{ + int x, y; + int max_len = 0, nr_lines = 0; + const char *t; + + t = text; + while (1) { + const char *sep = strchr(t, '\n'); + int len; + + if (sep == NULL) + sep = strchr(t, '\0'); + len = sep - t; + if (max_len < len) + max_len = len; + ++nr_lines; + if (*sep == '\0') + break; + t = sep + 1; + } + + max_len += 2; + nr_lines += 4; + y = SLtt_Screen_Rows / 2 - nr_lines / 2, + x = SLtt_Screen_Cols / 2 - max_len / 2; + + SLsmg_set_color(0); + SLsmg_draw_box(y, x++, nr_lines, max_len); + if (title) { + SLsmg_gotorc(y, x + 1); + SLsmg_write_string((char *)title); + } + SLsmg_gotorc(++y, x); + nr_lines -= 2; + max_len -= 2; + SLsmg_write_wrapped_string((unsigned char *)text, y, x, + nr_lines, max_len, 1); + SLsmg_gotorc(y + nr_lines - 2, x); + SLsmg_write_nstring((char *)" ", max_len); + SLsmg_gotorc(y + nr_lines - 1, x); + SLsmg_write_nstring((char *)exit_msg, max_len); + SLsmg_refresh(); + return ui__getch(delay_secs); +} + +int ui__help_window(const char *text) +{ + return ui__question_window("Help", text, "Press any key...", 0); +} + +int ui__dialog_yesno(const char *msg) +{ + return ui__question_window(NULL, msg, "Enter: Yes, ESC: No", 0); +} + +static int __ui__warning(const char *title, const char *format, va_list args) +{ + char *s; + + if (vasprintf(&s, format, args) > 0) { + int key; + + pthread_mutex_lock(&ui__lock); + key = ui__question_window(title, s, "Press any key...", 0); + pthread_mutex_unlock(&ui__lock); + free(s); + return key; + } + + fprintf(stderr, "%s\n", title); + vfprintf(stderr, format, args); + return K_ESC; +} + +static int perf_tui__error(const char *format, va_list args) +{ + return __ui__warning("Error:", format, args); +} + +static int perf_tui__warning(const char *format, va_list args) +{ + return __ui__warning("Warning:", format, args); +} + +struct perf_error_ops perf_tui_eops = { + .error = perf_tui__error, + .warning = perf_tui__warning, +}; diff --git a/tools/perf/ui/util.c b/tools/perf/ui/util.c index ad4374a16bb0..4f989774c8c6 100644 --- a/tools/perf/ui/util.c +++ b/tools/perf/ui/util.c @@ -1,250 +1,85 @@ -#include "../util.h" -#include -#include -#include -#include - -#include "../cache.h" -#include "../debug.h" -#include "browser.h" -#include "keysyms.h" -#include "helpline.h" -#include "ui.h" #include "util.h" -#include "libslang.h" - -static void ui_browser__argv_write(struct ui_browser *browser, - void *entry, int row) -{ - char **arg = entry; - bool current_entry = ui_browser__is_current_entry(browser, row); - - ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : - HE_COLORSET_NORMAL); - slsmg_write_nstring(*arg, browser->width); -} - -static int popup_menu__run(struct ui_browser *menu) -{ - int key; - - if (ui_browser__show(menu, " ", "ESC: exit, ENTER|->: Select option") < 0) - return -1; +#include "../debug.h" - while (1) { - key = ui_browser__run(menu, 0); - - switch (key) { - case K_RIGHT: - case K_ENTER: - key = menu->index; - break; - case K_LEFT: - case K_ESC: - case 'q': - case CTRL('c'): - key = -1; - break; - default: - continue; - } - - break; - } - - ui_browser__hide(menu); - return key; -} -int ui__popup_menu(int argc, char * const argv[]) +/* + * Default error logging functions + */ +static int perf_stdio__error(const char *format, va_list args) { - struct ui_browser menu = { - .entries = (void *)argv, - .refresh = ui_browser__argv_refresh, - .seek = ui_browser__argv_seek, - .write = ui_browser__argv_write, - .nr_entries = argc, - }; - - return popup_menu__run(&menu); + fprintf(stderr, "Error:\n"); + vfprintf(stderr, format, args); + return 0; } -int ui_browser__input_window(const char *title, const char *text, char *input, - const char *exit_msg, int delay_secs) +static int perf_stdio__warning(const char *format, va_list args) { - int x, y, len, key; - int max_len = 60, nr_lines = 0; - static char buf[50]; - const char *t; - - t = text; - while (1) { - const char *sep = strchr(t, '\n'); - - if (sep == NULL) - sep = strchr(t, '\0'); - len = sep - t; - if (max_len < len) - max_len = len; - ++nr_lines; - if (*sep == '\0') - break; - t = sep + 1; - } - - max_len += 2; - nr_lines += 8; - y = SLtt_Screen_Rows / 2 - nr_lines / 2; - x = SLtt_Screen_Cols / 2 - max_len / 2; - - SLsmg_set_color(0); - SLsmg_draw_box(y, x++, nr_lines, max_len); - if (title) { - SLsmg_gotorc(y, x + 1); - SLsmg_write_string((char *)title); - } - SLsmg_gotorc(++y, x); - nr_lines -= 7; - max_len -= 2; - SLsmg_write_wrapped_string((unsigned char *)text, y, x, - nr_lines, max_len, 1); - y += nr_lines; - len = 5; - while (len--) { - SLsmg_gotorc(y + len - 1, x); - SLsmg_write_nstring((char *)" ", max_len); - } - SLsmg_draw_box(y++, x + 1, 3, max_len - 2); - - SLsmg_gotorc(y + 3, x); - SLsmg_write_nstring((char *)exit_msg, max_len); - SLsmg_refresh(); - - x += 2; - len = 0; - key = ui__getch(delay_secs); - while (key != K_TIMER && key != K_ENTER && key != K_ESC) { - if (key == K_BKSPC) { - if (len == 0) - goto next_key; - SLsmg_gotorc(y, x + --len); - SLsmg_write_char(' '); - } else { - buf[len] = key; - SLsmg_gotorc(y, x + len++); - SLsmg_write_char(key); - } - SLsmg_refresh(); - - /* XXX more graceful overflow handling needed */ - if (len == sizeof(buf) - 1) { - ui_helpline__push("maximum size of symbol name reached!"); - key = K_ENTER; - break; - } -next_key: - key = ui__getch(delay_secs); - } - - buf[len] = '\0'; - strncpy(input, buf, len+1); - return key; + fprintf(stderr, "Warning:\n"); + vfprintf(stderr, format, args); + return 0; } -int ui__question_window(const char *title, const char *text, - const char *exit_msg, int delay_secs) +static struct perf_error_ops default_eops = { - int x, y; - int max_len = 0, nr_lines = 0; - const char *t; - - t = text; - while (1) { - const char *sep = strchr(t, '\n'); - int len; - - if (sep == NULL) - sep = strchr(t, '\0'); - len = sep - t; - if (max_len < len) - max_len = len; - ++nr_lines; - if (*sep == '\0') - break; - t = sep + 1; - } - - max_len += 2; - nr_lines += 4; - y = SLtt_Screen_Rows / 2 - nr_lines / 2, - x = SLtt_Screen_Cols / 2 - max_len / 2; - - SLsmg_set_color(0); - SLsmg_draw_box(y, x++, nr_lines, max_len); - if (title) { - SLsmg_gotorc(y, x + 1); - SLsmg_write_string((char *)title); - } - SLsmg_gotorc(++y, x); - nr_lines -= 2; - max_len -= 2; - SLsmg_write_wrapped_string((unsigned char *)text, y, x, - nr_lines, max_len, 1); - SLsmg_gotorc(y + nr_lines - 2, x); - SLsmg_write_nstring((char *)" ", max_len); - SLsmg_gotorc(y + nr_lines - 1, x); - SLsmg_write_nstring((char *)exit_msg, max_len); - SLsmg_refresh(); - return ui__getch(delay_secs); -} + .error = perf_stdio__error, + .warning = perf_stdio__warning, +}; -int ui__help_window(const char *text) -{ - return ui__question_window("Help", text, "Press any key...", 0); -} +static struct perf_error_ops *perf_eops = &default_eops; -int ui__dialog_yesno(const char *msg) -{ - return ui__question_window(NULL, msg, "Enter: Yes, ESC: No", 0); -} -int __ui__warning(const char *title, const char *format, va_list args) +int ui__error(const char *format, ...) { - char *s; - - if (use_browser > 0 && vasprintf(&s, format, args) > 0) { - int key; + int ret; + va_list args; - pthread_mutex_lock(&ui__lock); - key = ui__question_window(title, s, "Press any key...", 0); - pthread_mutex_unlock(&ui__lock); - free(s); - return key; - } + va_start(args, format); + ret = perf_eops->error(format, args); + va_end(args); - fprintf(stderr, "%s:\n", title); - vfprintf(stderr, format, args); - return K_ESC; + return ret; } int ui__warning(const char *format, ...) { - int key; + int ret; va_list args; va_start(args, format); - key = __ui__warning("Warning", format, args); + ret = perf_eops->warning(format, args); va_end(args); - return key; + + return ret; } -int ui__error(const char *format, ...) + +/** + * perf_error__register - Register error logging functions + * @eops: The pointer to error logging function struct + * + * Register UI-specific error logging functions. Before calling this, + * other logging functions should be unregistered, if any. + */ +int perf_error__register(struct perf_error_ops *eops) { - int key; - va_list args; + if (perf_eops != &default_eops) + return -1; - va_start(args, format); - key = __ui__warning("Error", format, args); - va_end(args); - return key; + perf_eops = eops; + return 0; +} + +/** + * perf_error__unregister - Unregister error logging functions + * @eops: The pointer to error logging function struct + * + * Unregister already registered error logging functions. + */ +int perf_error__unregister(struct perf_error_ops *eops) +{ + if (perf_eops != eops) + return -1; + + perf_eops = &default_eops; + return 0; } diff --git a/tools/perf/ui/util.h b/tools/perf/ui/util.h index 2d1738bd71c8..361f08c52d37 100644 --- a/tools/perf/ui/util.h +++ b/tools/perf/ui/util.h @@ -9,6 +9,13 @@ int ui__help_window(const char *text); int ui__dialog_yesno(const char *msg); int ui__question_window(const char *title, const char *text, const char *exit_msg, int delay_secs); -int __ui__warning(const char *title, const char *format, va_list args); + +struct perf_error_ops { + int (*error)(const char *format, va_list args); + int (*warning)(const char *format, va_list args); +}; + +int perf_error__register(struct perf_error_ops *eops); +int perf_error__unregister(struct perf_error_ops *eops); #endif /* _PERF_UI_UTIL_H_ */ diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index efb1fce259a4..4dfe0bb3c322 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -47,7 +47,7 @@ int dump_printf(const char *fmt, ...) return ret; } -#ifdef NO_NEWT_SUPPORT +#if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT) int ui__warning(const char *format, ...) { va_list args; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 6bebe7f0a20c..015c91dbc096 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -12,8 +12,9 @@ int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); void trace_event(union perf_event *event); struct ui_progress; +struct perf_error_ops; -#ifdef NO_NEWT_SUPPORT +#if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT) static inline int ui_helpline__show_help(const char *format __used, va_list ap __used) { return 0; @@ -23,12 +24,28 @@ static inline void ui_progress__update(u64 curr __used, u64 total __used, const char *title __used) {} #define ui__error(format, arg...) ui__warning(format, ##arg) -#else + +static inline int +perf_error__register(struct perf_error_ops *eops __used) +{ + return 0; +} + +static inline int +perf_error__unregister(struct perf_error_ops *eops __used) +{ + return 0; +} + +#else /* NO_NEWT_SUPPORT && NO_GTK2_SUPPORT */ + extern char ui_helpline__last_msg[]; int ui_helpline__show_help(const char *format, va_list ap); #include "../ui/progress.h" int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2))); -#endif +#include "../ui/util.h" + +#endif /* NO_NEWT_SUPPORT && NO_GTK2_SUPPORT */ int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2))); int ui__error_paranoid(void); -- cgit v1.2.3 From 42ab68a35ffee04700648ec42c9507145a66837d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 29 May 2012 13:22:59 +0900 Subject: perf ui/gtk: Introduce struct perf_gtk_context The struct perf_gtk_context is for tracking current state of GTK window and/or other things. This is a preparation of next changes. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1338265382-6872-5-git-send-email-namhyung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/gtk/browser.c | 8 +++++++- tools/perf/ui/gtk/gtk.h | 17 +++++++++++++++++ tools/perf/ui/gtk/util.c | 23 +++++++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c index 0656c381a89c..33ab1aee3472 100644 --- a/tools/perf/ui/gtk/browser.c +++ b/tools/perf/ui/gtk/browser.c @@ -11,8 +11,8 @@ static void perf_gtk__signal(int sig) { + perf_gtk__exit(false); psignal(sig, "perf"); - gtk_main_quit(); } static void perf_gtk__resize_window(GtkWidget *window) @@ -143,6 +143,10 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, g_signal_connect(window, "delete_event", gtk_main_quit, NULL); + pgctx = perf_gtk__activate_context(window); + if (!pgctx) + return -1; + notebook = gtk_notebook_new(); list_for_each_entry(pos, &evlist->entries, node) { @@ -174,5 +178,7 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, gtk_main(); + perf_gtk__deactivate_context(&pgctx); + return 0; } diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index 75177ee04032..34fbca6d48a2 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -1,8 +1,25 @@ #ifndef _PERF_GTK_H_ #define _PERF_GTK_H_ 1 +#include + #pragma GCC diagnostic ignored "-Wstrict-prototypes" #include #pragma GCC diagnostic error "-Wstrict-prototypes" + +struct perf_gtk_context { + GtkWidget *main_window; +}; + +extern struct perf_gtk_context *pgctx; + +static inline bool perf_gtk__is_active_context(struct perf_gtk_context *ctx) +{ + return ctx && ctx->main_window; +} + +struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window); +int perf_gtk__deactivate_context(struct perf_gtk_context **ctx); + #endif /* _PERF_GTK_H_ */ diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c index a727fe394e91..6fe13fdc513e 100644 --- a/tools/perf/ui/gtk/util.c +++ b/tools/perf/ui/gtk/util.c @@ -3,6 +3,29 @@ #include "gtk.h" +struct perf_gtk_context *pgctx; + +struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window) +{ + struct perf_gtk_context *ctx; + + ctx = malloc(sizeof(*pgctx)); + if (ctx) + ctx->main_window = window; + + return ctx; +} + +int perf_gtk__deactivate_context(struct perf_gtk_context **ctx) +{ + if (!perf_gtk__is_active_context(*ctx)) + return -1; + + free(*ctx); + *ctx = NULL; + return 0; +} + /* * FIXME: Functions below should be implemented properly. * For now, just add stubs for NO_NEWT=1 build. -- cgit v1.2.3 From b4418c6848dcd56cc90625dcfaef7cb019492233 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 29 May 2012 13:23:00 +0900 Subject: perf ui/gtk: Add GTK statusbar widget to browser window Add statusbar widget to display non-critical messages at the bottom of the window. This can be used for showing a status change, warning or help message. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1338265382-6872-6-git-send-email-namhyung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/gtk/browser.c | 26 +++++++++++++++++++++++++- tools/perf/ui/gtk/gtk.h | 2 ++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c index 33ab1aee3472..ece360db8658 100644 --- a/tools/perf/ui/gtk/browser.c +++ b/tools/perf/ui/gtk/browser.c @@ -122,13 +122,30 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists) gtk_container_add(GTK_CONTAINER(window), view); } +static GtkWidget *perf_gtk__setup_statusbar(void) +{ + GtkWidget *stbar; + unsigned ctxid; + + stbar = gtk_statusbar_new(); + + ctxid = gtk_statusbar_get_context_id(GTK_STATUSBAR(stbar), + "perf report"); + pgctx->statbar = stbar; + pgctx->statbar_ctx_id = ctxid; + + return stbar; +} + int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help __used, void (*timer) (void *arg)__used, void *arg __used, int delay_secs __used) { struct perf_evsel *pos; + GtkWidget *vbox; GtkWidget *notebook; + GtkWidget *statbar; GtkWidget *window; signal(SIGSEGV, perf_gtk__signal); @@ -147,6 +164,8 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, if (!pgctx) return -1; + vbox = gtk_vbox_new(FALSE, 0); + notebook = gtk_notebook_new(); list_for_each_entry(pos, &evlist->entries, node) { @@ -168,7 +187,12 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window, tab_label); } - gtk_container_add(GTK_CONTAINER(window), notebook); + gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0); + + statbar = perf_gtk__setup_statusbar(); + gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0); + + gtk_container_add(GTK_CONTAINER(window), vbox); gtk_widget_show_all(window); diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index 34fbca6d48a2..206167868c5f 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -10,6 +10,8 @@ struct perf_gtk_context { GtkWidget *main_window; + GtkWidget *statbar; + guint statbar_ctx_id; }; extern struct perf_gtk_context *pgctx; -- cgit v1.2.3 From a6b702c117f839023814c1e03453c701d26de522 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 29 May 2012 13:23:01 +0900 Subject: perf ui/gtk: Add GTK info_bar widget to browser window The GtkInfoBar is a modern UI component to display messages without bothering the main window. It'll be used for showing a warning message. As the GtkInfoBar requires 2.18 (or newer) version of GTK+ library, add availability check to Makefile too. Suggested-by: Sunjin Yang Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1338265382-6872-7-git-send-email-namhyung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 3 +++ tools/perf/config/feature-tests.mak | 13 +++++++++++++ tools/perf/ui/gtk/browser.c | 33 +++++++++++++++++++++++++++++++++ tools/perf/ui/gtk/gtk.h | 12 ++++++++++++ 4 files changed, 61 insertions(+) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index c0ee917ae8ab..d698c118a602 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -523,6 +523,9 @@ else msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev); BASIC_CFLAGS += -DNO_GTK2_SUPPORT else + ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2)),y) + BASIC_CFLAGS += -DHAVE_GTK_INFO_BAR + endif BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0) EXTLIBS += $(shell pkg-config --libs gtk+-2.0) LIB_OBJS += $(OUTPUT)ui/gtk/browser.o diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak index d9084e03ce56..6c18785a6417 100644 --- a/tools/perf/config/feature-tests.mak +++ b/tools/perf/config/feature-tests.mak @@ -78,6 +78,19 @@ int main(int argc, char *argv[]) return 0; } endef + +define SOURCE_GTK2_INFOBAR +#pragma GCC diagnostic ignored \"-Wstrict-prototypes\" +#include +#pragma GCC diagnostic error \"-Wstrict-prototypes\" + +int main(void) +{ + gtk_info_bar_new(); + + return 0; +} +endef endif ifndef NO_LIBPERL diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c index ece360db8658..fd41e8d10337 100644 --- a/tools/perf/ui/gtk/browser.c +++ b/tools/perf/ui/gtk/browser.c @@ -122,6 +122,34 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists) gtk_container_add(GTK_CONTAINER(window), view); } +#ifdef HAVE_GTK_INFO_BAR +static GtkWidget *perf_gtk__setup_info_bar(void) +{ + GtkWidget *info_bar; + GtkWidget *label; + GtkWidget *content_area; + + info_bar = gtk_info_bar_new(); + gtk_widget_set_no_show_all(info_bar, TRUE); + + label = gtk_label_new(""); + gtk_widget_show(label); + + content_area = gtk_info_bar_get_content_area(GTK_INFO_BAR(info_bar)); + gtk_container_add(GTK_CONTAINER(content_area), label); + + gtk_info_bar_add_button(GTK_INFO_BAR(info_bar), GTK_STOCK_OK, + GTK_RESPONSE_OK); + g_signal_connect(info_bar, "response", + G_CALLBACK(gtk_widget_hide), NULL); + + pgctx->info_bar = info_bar; + pgctx->message_label = label; + + return info_bar; +} +#endif + static GtkWidget *perf_gtk__setup_statusbar(void) { GtkWidget *stbar; @@ -145,6 +173,7 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, struct perf_evsel *pos; GtkWidget *vbox; GtkWidget *notebook; + GtkWidget *info_bar; GtkWidget *statbar; GtkWidget *window; @@ -189,6 +218,10 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0); + info_bar = perf_gtk__setup_info_bar(); + if (info_bar) + gtk_box_pack_start(GTK_BOX(vbox), info_bar, FALSE, FALSE, 0); + statbar = perf_gtk__setup_statusbar(); gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0); diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index 206167868c5f..a4d0f2b4a2dc 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -10,6 +10,11 @@ struct perf_gtk_context { GtkWidget *main_window; + +#ifdef HAVE_GTK_INFO_BAR + GtkWidget *info_bar; + GtkWidget *message_label; +#endif GtkWidget *statbar; guint statbar_ctx_id; }; @@ -24,4 +29,11 @@ static inline bool perf_gtk__is_active_context(struct perf_gtk_context *ctx) struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window); int perf_gtk__deactivate_context(struct perf_gtk_context **ctx); +#ifndef HAVE_GTK_INFO_BAR +static inline GtkWidget *perf_gtk__setup_info_bar(void) +{ + return NULL; +} +#endif + #endif /* _PERF_GTK_H_ */ -- cgit v1.2.3 From e078ba14dff5c315ce19a978574883f417d2cfa0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 29 May 2012 13:23:02 +0900 Subject: perf ui/gtk: Use struct perf_error_ops Define and use perf_gtk_eops to provide a GTK2 message dialog for error reporting and a info_bar for warning. As GtkInfoBar requires recent GTK+ libraries, provides a fallback implementation using statusbar widget too. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1338265382-6872-8-git-send-email-namhyung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/gtk/setup.c | 5 +++ tools/perf/ui/gtk/util.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c index 829529957766..92879ce61e2f 100644 --- a/tools/perf/ui/gtk/setup.c +++ b/tools/perf/ui/gtk/setup.c @@ -1,12 +1,17 @@ #include "gtk.h" #include "../../util/cache.h" +#include "../../util/debug.h" + +extern struct perf_error_ops perf_gtk_eops; int perf_gtk__init(void) { + perf_error__register(&perf_gtk_eops); return gtk_init_check(NULL, NULL) ? 0 : -1; } void perf_gtk__exit(bool wait_for_ok __used) { + perf_error__unregister(&perf_gtk_eops); gtk_main_quit(); } diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c index 6fe13fdc513e..0ead373c0dfb 100644 --- a/tools/perf/ui/gtk/util.c +++ b/tools/perf/ui/gtk/util.c @@ -2,6 +2,8 @@ #include "../../util/debug.h" #include "gtk.h" +#include + struct perf_gtk_context *pgctx; @@ -26,6 +28,90 @@ int perf_gtk__deactivate_context(struct perf_gtk_context **ctx) return 0; } +static int perf_gtk__error(const char *format, va_list args) +{ + char *msg; + GtkWidget *dialog; + + if (!perf_gtk__is_active_context(pgctx) || + vasprintf(&msg, format, args) < 0) { + fprintf(stderr, "Error:\n"); + vfprintf(stderr, format, args); + fprintf(stderr, "\n"); + return -1; + } + + dialog = gtk_message_dialog_new_with_markup(GTK_WINDOW(pgctx->main_window), + GTK_DIALOG_DESTROY_WITH_PARENT, + GTK_MESSAGE_ERROR, + GTK_BUTTONS_CLOSE, + "Error\n\n%s", msg); + gtk_dialog_run(GTK_DIALOG(dialog)); + + gtk_widget_destroy(dialog); + free(msg); + return 0; +} + +#ifdef HAVE_GTK_INFO_BAR +static int perf_gtk__warning_info_bar(const char *format, va_list args) +{ + char *msg; + + if (!perf_gtk__is_active_context(pgctx) || + vasprintf(&msg, format, args) < 0) { + fprintf(stderr, "Warning:\n"); + vfprintf(stderr, format, args); + fprintf(stderr, "\n"); + return -1; + } + + gtk_label_set_text(GTK_LABEL(pgctx->message_label), msg); + gtk_info_bar_set_message_type(GTK_INFO_BAR(pgctx->info_bar), + GTK_MESSAGE_WARNING); + gtk_widget_show(pgctx->info_bar); + + free(msg); + return 0; +} +#else +static int perf_gtk__warning_statusbar(const char *format, va_list args) +{ + char *msg, *p; + + if (!perf_gtk__is_active_context(pgctx) || + vasprintf(&msg, format, args) < 0) { + fprintf(stderr, "Warning:\n"); + vfprintf(stderr, format, args); + fprintf(stderr, "\n"); + return -1; + } + + gtk_statusbar_pop(GTK_STATUSBAR(pgctx->statbar), + pgctx->statbar_ctx_id); + + /* Only first line can be displayed */ + p = strchr(msg, '\n'); + if (p) + *p = '\0'; + + gtk_statusbar_push(GTK_STATUSBAR(pgctx->statbar), + pgctx->statbar_ctx_id, msg); + + free(msg); + return 0; +} +#endif + +struct perf_error_ops perf_gtk_eops = { + .error = perf_gtk__error, +#ifdef HAVE_GTK_INFO_BAR + .warning = perf_gtk__warning_info_bar, +#else + .warning = perf_gtk__warning_statusbar, +#endif +}; + /* * FIXME: Functions below should be implemented properly. * For now, just add stubs for NO_NEWT=1 build. -- cgit v1.2.3 From cb1a28a0cbf9dac5a7a0ca02ebebc12db260d2f8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 7 Jun 2012 18:23:31 -0300 Subject: perf lib: Introduce rtrim Remove the trailing whitespaces. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-8bxozh5lyixgjmziqaxo9675@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/string.c | 22 ++++++++++++++++++++++ tools/perf/util/util.h | 2 ++ 2 files changed, 24 insertions(+) diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index d5836382ff2c..199bc4d8905d 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -313,3 +313,25 @@ int strtailcmp(const char *s1, const char *s2) return 0; } +/** + * rtrim - Removes trailing whitespace from @s. + * @s: The string to be stripped. + * + * Note that the first trailing whitespace is replaced with a %NUL-terminator + * in the given string @s. Returns @s. + */ +char *rtrim(char *s) +{ + size_t size = strlen(s); + char *end; + + if (!size) + return s; + + end = s + size - 1; + while (end >= s && isspace(*end)) + end--; + *(end + 1) = '\0'; + + return s; +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 2daaedb83d84..b13c7331eaf8 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -264,4 +264,6 @@ bool is_power_of_2(unsigned long n) size_t hex_width(u64 v); +char *rtrim(char *s); + #endif -- cgit v1.2.3 From aff3f3f68ae6002a30543726b2ae48cafce109e6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 7 Jun 2012 19:31:28 -0300 Subject: perf hists browser: Implement printing snapshots to files To avoid having to resort to --stdio, that expands everything, instead allow the user to go on expanding the relevant callchains and then press 'P' to print that view. As the hists browser is used for both static (report) and dynamic (top) views, it prints to a 'perf.hists.N' sequence, i.e. multiple snapshots can be taken in report and top. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-wr9xx4ba0utrynu5j6wotd79@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 195 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 53f6697d014e..f556e5f6388b 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -23,6 +23,7 @@ struct hist_browser { struct hists *hists; struct hist_entry *he_selection; struct map_symbol *selection; + int print_seq; bool has_symbols; }; @@ -800,6 +801,196 @@ do_offset: } } +static int hist_browser__fprintf_callchain_node_rb_tree(struct hist_browser *browser, + struct callchain_node *chain_node, + u64 total, int level, + FILE *fp) +{ + struct rb_node *node; + int offset = level * LEVEL_OFFSET_STEP; + u64 new_total, remaining; + int printed = 0; + + if (callchain_param.mode == CHAIN_GRAPH_REL) + new_total = chain_node->children_hit; + else + new_total = total; + + remaining = new_total; + node = rb_first(&chain_node->rb_root); + while (node) { + struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); + struct rb_node *next = rb_next(node); + u64 cumul = callchain_cumul_hits(child); + struct callchain_list *chain; + char folded_sign = ' '; + int first = true; + int extra_offset = 0; + + remaining -= cumul; + + list_for_each_entry(chain, &child->val, list) { + char ipstr[BITS_PER_LONG / 4 + 1], *alloc_str; + const char *str; + bool was_first = first; + + if (first) + first = false; + else + extra_offset = LEVEL_OFFSET_STEP; + + folded_sign = callchain_list__folded(chain); + + alloc_str = NULL; + str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + if (was_first) { + double percent = cumul * 100.0 / new_total; + + if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0) + str = "Not enough memory!"; + else + str = alloc_str; + } + + printed += fprintf(fp, "%*s%c %s\n", offset + extra_offset, " ", folded_sign, str); + free(alloc_str); + if (folded_sign == '+') + break; + } + + if (folded_sign == '-') { + const int new_level = level + (extra_offset ? 2 : 1); + printed += hist_browser__fprintf_callchain_node_rb_tree(browser, child, new_total, + new_level, fp); + } + + node = next; + } + + return printed; +} + +static int hist_browser__fprintf_callchain_node(struct hist_browser *browser, + struct callchain_node *node, + int level, FILE *fp) +{ + struct callchain_list *chain; + int offset = level * LEVEL_OFFSET_STEP; + char folded_sign = ' '; + int printed = 0; + + list_for_each_entry(chain, &node->val, list) { + char ipstr[BITS_PER_LONG / 4 + 1], *s; + + folded_sign = callchain_list__folded(chain); + s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + printed += fprintf(fp, "%*s%c %s\n", offset, " ", folded_sign, s); + } + + if (folded_sign == '-') + printed += hist_browser__fprintf_callchain_node_rb_tree(browser, node, + browser->hists->stats.total_period, + level + 1, fp); + return printed; +} + +static int hist_browser__fprintf_callchain(struct hist_browser *browser, + struct rb_root *chain, int level, FILE *fp) +{ + struct rb_node *nd; + int printed = 0; + + for (nd = rb_first(chain); nd; nd = rb_next(nd)) { + struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); + + printed += hist_browser__fprintf_callchain_node(browser, node, level, fp); + } + + return printed; +} + +static int hist_browser__fprintf_entry(struct hist_browser *browser, + struct hist_entry *he, FILE *fp) +{ + char s[8192]; + double percent; + int printed = 0; + char folded_sign = ' '; + + if (symbol_conf.use_callchain) + folded_sign = hist_entry__folded(he); + + hist_entry__snprintf(he, s, sizeof(s), browser->hists); + percent = (he->period * 100.0) / browser->hists->stats.total_period; + + if (symbol_conf.use_callchain) + printed += fprintf(fp, "%c ", folded_sign); + + printed += fprintf(fp, " %5.2f%%", percent); + + if (symbol_conf.show_nr_samples) + printed += fprintf(fp, " %11u", he->nr_events); + + if (symbol_conf.show_total_period) + printed += fprintf(fp, " %12" PRIu64, he->period); + + printed += fprintf(fp, "%s\n", rtrim(s)); + + if (folded_sign == '-') + printed += hist_browser__fprintf_callchain(browser, &he->sorted_chain, 1, fp); + + return printed; +} + +static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) +{ + struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries)); + int printed = 0; + + while (nd) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + + printed += hist_browser__fprintf_entry(browser, h, fp); + nd = hists__filter_entries(rb_next(nd)); + } + + return printed; +} + +static int hist_browser__dump(struct hist_browser *browser) +{ + char filename[64]; + FILE *fp; + + while (1) { + scnprintf(filename, sizeof(filename), "perf.hist.%d", browser->print_seq); + if (access(filename, F_OK)) + break; + /* + * XXX: Just an arbitrary lazy upper limit + */ + if (++browser->print_seq == 8192) { + ui_helpline__fpush("Too many perf.hist.N files, nothing written!"); + return -1; + } + } + + fp = fopen(filename, "w"); + if (fp == NULL) { + char bf[64]; + strerror_r(errno, bf, sizeof(bf)); + ui_helpline__fpush("Couldn't write to %s: %s", filename, bf); + return -1; + } + + ++browser->print_seq; + hist_browser__fprintf(browser, fp); + fclose(fp); + ui_helpline__fpush("%s written!", filename); + + return 0; +} + static struct hist_browser *hist_browser__new(struct hists *hists) { struct hist_browser *browser = zalloc(sizeof(*browser)); @@ -937,6 +1128,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, browser->selection->map->dso->annotate_warned) continue; goto do_annotate; + case 'P': + hist_browser__dump(browser); + continue; case 'd': goto zoom_dso; case 't': @@ -969,6 +1163,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "E Expand all callchains\n" "d Zoom into current DSO\n" "t Zoom into current Thread\n" + "P Print histograms to perf.hist.N\n" "/ Filter symbol by name"); continue; case K_ENTER: -- cgit v1.2.3 From 27f18617b01dbbc928e9bd3731d1766222fb7e0d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Jun 2012 13:33:09 -0300 Subject: perf evsel: Carve out event modifier formatting From perf_evsel__hw_name, so that we can use it for the other kinds of events (tracepoints, software, hw cache, etc). Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-9gmd5wewsrvtny8tzxjfp471@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 9f6cebd798ee..dab893804a14 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -86,16 +86,15 @@ const char *__perf_evsel__hw_name(u64 config) return "unknown-hardware"; } -static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size) +static int perf_evsel__add_modifiers(struct perf_evsel *evsel, char *bf, size_t size) { - int colon = 0; + int colon = 0, r = 0; struct perf_event_attr *attr = &evsel->attr; - int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(attr->config)); bool exclude_guest_default = false; #define MOD_PRINT(context, mod) do { \ if (!attr->exclude_##context) { \ - if (!colon) colon = r++; \ + if (!colon) colon = ++r; \ r += scnprintf(bf + r, size - r, "%c", mod); \ } } while(0) @@ -108,7 +107,7 @@ static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size) if (attr->precise_ip) { if (!colon) - colon = r++; + colon = ++r; r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp"); exclude_guest_default = true; } @@ -119,10 +118,16 @@ static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size) } #undef MOD_PRINT if (colon) - bf[colon] = ':'; + bf[colon - 1] = ':'; return r; } +static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size) +{ + int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->attr.config)); + return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); +} + int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size) { int ret; -- cgit v1.2.3 From 0b668bc9a74ce1bd3b8c5fd93e8d85ed955e11fe Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Jun 2012 14:08:07 -0300 Subject: perf tools: Reconstruct hw cache event with modifiers from perf_event_attr [root@sandy ~]# perf record -a -e dTLB-load-misses:u usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.486 MB perf.data (~21216 samples) ] Before: [root@sandy ~]# perf evlist dTLB-load-misses [root@sandy ~]# After: [root@sandy ~]# perf evlist dTLB-load-misses:u [root@sandy ~]# Ditto for other tools. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-7x1b0e6jthkr93lfjzsuakk5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 104 +++++++++++++++++++++++++++++++++++++ tools/perf/util/evsel.h | 16 +++++- tools/perf/util/parse-events.c | 115 ++++++----------------------------------- 3 files changed, 134 insertions(+), 101 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index dab893804a14..47f1fe2feab8 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -128,6 +128,105 @@ static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size) return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); } +const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] + [PERF_EVSEL__MAX_ALIASES] = { + { "L1-dcache", "l1-d", "l1d", "L1-data", }, + { "L1-icache", "l1-i", "l1i", "L1-instruction", }, + { "LLC", "L2", }, + { "dTLB", "d-tlb", "Data-TLB", }, + { "iTLB", "i-tlb", "Instruction-TLB", }, + { "branch", "branches", "bpu", "btb", "bpc", }, + { "node", }, +}; + +const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_EVSEL__MAX_ALIASES] = { + { "load", "loads", "read", }, + { "store", "stores", "write", }, + { "prefetch", "prefetches", "speculative-read", "speculative-load", }, +}; + +const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] + [PERF_EVSEL__MAX_ALIASES] = { + { "refs", "Reference", "ops", "access", }, + { "misses", "miss", }, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x +#define CACHE_READ (1 << C(OP_READ)) +#define CACHE_WRITE (1 << C(OP_WRITE)) +#define CACHE_PREFETCH (1 << C(OP_PREFETCH)) +#define COP(x) (1 << x) + +/* + * cache operartion stat + * L1I : Read and prefetch only + * ITLB and BPU : Read-only + */ +static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = { + [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(L1I)] = (CACHE_READ | CACHE_PREFETCH), + [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(ITLB)] = (CACHE_READ), + [C(BPU)] = (CACHE_READ), + [C(NODE)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), +}; + +bool perf_evsel__is_cache_op_valid(u8 type, u8 op) +{ + if (perf_evsel__hw_cache_stat[type] & COP(op)) + return true; /* valid */ + else + return false; /* invalid */ +} + +int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, + char *bf, size_t size) +{ + if (result) { + return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0], + perf_evsel__hw_cache_op[op][0], + perf_evsel__hw_cache_result[result][0]); + } + + return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0], + perf_evsel__hw_cache_op[op][1]); +} + +int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size) +{ + u8 op, result, type = (config >> 0) & 0xff; + const char *err = "unknown-ext-hardware-cache-type"; + + if (type > PERF_COUNT_HW_CACHE_MAX) + goto out_err; + + op = (config >> 8) & 0xff; + err = "unknown-ext-hardware-cache-op"; + if (op > PERF_COUNT_HW_CACHE_OP_MAX) + goto out_err; + + result = (config >> 16) & 0xff; + err = "unknown-ext-hardware-cache-result"; + if (result > PERF_COUNT_HW_CACHE_RESULT_MAX) + goto out_err; + + err = "invalid-cache"; + if (!perf_evsel__is_cache_op_valid(type, op)) + goto out_err; + + return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size); +out_err: + return scnprintf(bf, size, "%s", err); +} + +static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t size) +{ + int ret = __perf_evsel__hw_cache_name(evsel->attr.config, bf, size); + return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); +} + int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size) { int ret; @@ -140,6 +239,11 @@ int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size) case PERF_TYPE_HARDWARE: ret = perf_evsel__hw_name(evsel, bf, size); break; + + case PERF_TYPE_HW_CACHE: + ret = perf_evsel__hw_cache_name(evsel, bf, size); + break; + default: /* * FIXME diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4ba8b564e6f4..5bf946a05a6b 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -83,7 +83,21 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts, struct perf_evsel *first); -const char* __perf_evsel__hw_name(u64 config); +bool perf_evsel__is_cache_op_valid(u8 type, u8 op); + +#define PERF_EVSEL__MAX_ALIASES 8 + +extern const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] + [PERF_EVSEL__MAX_ALIASES]; +extern const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_EVSEL__MAX_ALIASES]; +const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] + [PERF_EVSEL__MAX_ALIASES]; +int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, + char *bf, size_t size); +int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size); + +const char *__perf_evsel__hw_name(u64 config); int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 05dbc8b3c767..c8f8cf4a6920 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -74,51 +74,6 @@ static const char *sw_event_names[PERF_COUNT_SW_MAX] = { "emulation-faults", }; -#define MAX_ALIASES 8 - -static const char *hw_cache[PERF_COUNT_HW_CACHE_MAX][MAX_ALIASES] = { - { "L1-dcache", "l1-d", "l1d", "L1-data", }, - { "L1-icache", "l1-i", "l1i", "L1-instruction", }, - { "LLC", "L2", }, - { "dTLB", "d-tlb", "Data-TLB", }, - { "iTLB", "i-tlb", "Instruction-TLB", }, - { "branch", "branches", "bpu", "btb", "bpc", }, - { "node", }, -}; - -static const char *hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][MAX_ALIASES] = { - { "load", "loads", "read", }, - { "store", "stores", "write", }, - { "prefetch", "prefetches", "speculative-read", "speculative-load", }, -}; - -static const char *hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] - [MAX_ALIASES] = { - { "refs", "Reference", "ops", "access", }, - { "misses", "miss", }, -}; - -#define C(x) PERF_COUNT_HW_CACHE_##x -#define CACHE_READ (1 << C(OP_READ)) -#define CACHE_WRITE (1 << C(OP_WRITE)) -#define CACHE_PREFETCH (1 << C(OP_PREFETCH)) -#define COP(x) (1 << x) - -/* - * cache operartion stat - * L1I : Read and prefetch only - * ITLB and BPU : Read-only - */ -static unsigned long hw_cache_stat[C(MAX)] = { - [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), - [C(L1I)] = (CACHE_READ | CACHE_PREFETCH), - [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), - [C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), - [C(ITLB)] = (CACHE_READ), - [C(BPU)] = (CACHE_READ), - [C(NODE)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), -}; - #define for_each_subsystem(sys_dir, sys_dirent, sys_next) \ while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next) \ if (sys_dirent.d_type == DT_DIR && \ @@ -236,30 +191,6 @@ static const char *tracepoint_id_to_name(u64 config) return buf; } -static int is_cache_op_valid(u8 cache_type, u8 cache_op) -{ - if (hw_cache_stat[cache_type] & COP(cache_op)) - return 1; /* valid */ - else - return 0; /* invalid */ -} - -static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result) -{ - static char name[50]; - - if (cache_result) { - sprintf(name, "%s-%s-%s", hw_cache[cache_type][0], - hw_cache_op[cache_op][0], - hw_cache_result[cache_result][0]); - } else { - sprintf(name, "%s-%s", hw_cache[cache_type][0], - hw_cache_op[cache_op][1]); - } - - return name; -} - const char *event_type(int type) { switch (type) { @@ -287,7 +218,7 @@ const char *event_name(struct perf_evsel *evsel) u64 config = evsel->attr.config; int type = evsel->attr.type; - if (type == PERF_TYPE_RAW || type == PERF_TYPE_HARDWARE) { + if (type == PERF_TYPE_RAW || type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) { /* * XXX minimal fix, see comment on perf_evsen__name, this static buffer * will go away together with event_name in the next devel cycle. @@ -316,26 +247,9 @@ const char *__event_name(int type, u64 config) case PERF_TYPE_HARDWARE: return __perf_evsel__hw_name(config); - case PERF_TYPE_HW_CACHE: { - u8 cache_type, cache_op, cache_result; - - cache_type = (config >> 0) & 0xff; - if (cache_type > PERF_COUNT_HW_CACHE_MAX) - return "unknown-ext-hardware-cache-type"; - - cache_op = (config >> 8) & 0xff; - if (cache_op > PERF_COUNT_HW_CACHE_OP_MAX) - return "unknown-ext-hardware-cache-op"; - - cache_result = (config >> 16) & 0xff; - if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX) - return "unknown-ext-hardware-cache-result"; - - if (!is_cache_op_valid(cache_type, cache_op)) - return "invalid-cache"; - - return event_cache_name(cache_type, cache_op, cache_result); - } + case PERF_TYPE_HW_CACHE: + __perf_evsel__hw_cache_name(config, buf, sizeof(buf)); + return buf; case PERF_TYPE_SOFTWARE: if (config < PERF_COUNT_SW_MAX && sw_event_names[config]) @@ -379,13 +293,13 @@ static int add_event(struct list_head **_list, int *idx, return 0; } -static int parse_aliases(char *str, const char *names[][MAX_ALIASES], int size) +static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) { int i, j; int n, longest = -1; for (i = 0; i < size; i++) { - for (j = 0; j < MAX_ALIASES && names[i][j]; j++) { + for (j = 0; j < PERF_EVSEL__MAX_ALIASES && names[i][j]; j++) { n = strlen(names[i][j]); if (n > longest && !strncasecmp(str, names[i][j], n)) longest = n; @@ -410,7 +324,7 @@ int parse_events_add_cache(struct list_head **list, int *idx, * No fallback - if we cannot get a clear cache type * then bail out: */ - cache_type = parse_aliases(type, hw_cache, + cache_type = parse_aliases(type, perf_evsel__hw_cache, PERF_COUNT_HW_CACHE_MAX); if (cache_type == -1) return -EINVAL; @@ -423,18 +337,18 @@ int parse_events_add_cache(struct list_head **list, int *idx, snprintf(name + n, MAX_NAME_LEN - n, "-%s\n", str); if (cache_op == -1) { - cache_op = parse_aliases(str, hw_cache_op, + cache_op = parse_aliases(str, perf_evsel__hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX); if (cache_op >= 0) { - if (!is_cache_op_valid(cache_type, cache_op)) + if (!perf_evsel__is_cache_op_valid(cache_type, cache_op)) return -EINVAL; continue; } } if (cache_result == -1) { - cache_result = parse_aliases(str, hw_cache_result, - PERF_COUNT_HW_CACHE_RESULT_MAX); + cache_result = parse_aliases(str, perf_evsel__hw_cache_result, + PERF_COUNT_HW_CACHE_RESULT_MAX); if (cache_result >= 0) continue; } @@ -970,16 +884,17 @@ void print_events_type(u8 type) int print_hwcache_events(const char *event_glob) { unsigned int type, op, i, printed = 0; + char name[64]; for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { /* skip invalid cache type */ - if (!is_cache_op_valid(type, op)) + if (!perf_evsel__is_cache_op_valid(type, op)) continue; for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - char *name = event_cache_name(type, op, i); - + __perf_evsel__hw_cache_type_op_res_name(type, op, i, + name, sizeof(name)); if (event_glob != NULL && !strglobmatch(name, event_glob)) continue; -- cgit v1.2.3 From 335c2f5d25319b208fb8b444e6f3099a806a33bf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Jun 2012 14:36:20 -0300 Subject: perf tools: Reconstruct sw event with modifiers from perf_event_attr [root@sandy ~]# perf record -e task-clock:u -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.482 MB perf.data (~21073 samples) ] [root@sandy ~]# Before: [root@sandy ~]# perf evlist task-clock [root@sandy ~]# After: [root@sandy ~]# perf evlist task-clock:u [root@sandy ~]# Ditto for other tools. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-97ltkmj7v23kyhflltf6iz5n@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 29 +++++++++++++++++++++++++++++ tools/perf/util/evsel.h | 2 ++ tools/perf/util/parse-events.c | 19 +++---------------- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 47f1fe2feab8..2da047331173 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -128,6 +128,31 @@ static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size) return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); } +static const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = { + "cpu-clock", + "task-clock", + "page-faults", + "context-switches", + "CPU-migrations", + "minor-faults", + "major-faults", + "alignment-faults", + "emulation-faults", +}; + +const char *__perf_evsel__sw_name(u64 config) +{ + if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config]) + return perf_evsel__sw_names[config]; + return "unknown-software"; +} + +static int perf_evsel__sw_name(struct perf_evsel *evsel, char *bf, size_t size) +{ + int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->attr.config)); + return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); +} + const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] [PERF_EVSEL__MAX_ALIASES] = { { "L1-dcache", "l1-d", "l1d", "L1-data", }, @@ -244,6 +269,10 @@ int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size) ret = perf_evsel__hw_cache_name(evsel, bf, size); break; + case PERF_TYPE_SOFTWARE: + ret = perf_evsel__sw_name(evsel, bf, size); + break; + default: /* * FIXME diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 5bf946a05a6b..9ae64d9622bc 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -98,6 +98,8 @@ int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size); const char *__perf_evsel__hw_name(u64 config); +const char *__perf_evsel__sw_name(u64 config); + int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index c8f8cf4a6920..641c4ac8a838 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -62,18 +62,6 @@ static struct event_symbol event_symbols[] = { #define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) -static const char *sw_event_names[PERF_COUNT_SW_MAX] = { - "cpu-clock", - "task-clock", - "page-faults", - "context-switches", - "CPU-migrations", - "minor-faults", - "major-faults", - "alignment-faults", - "emulation-faults", -}; - #define for_each_subsystem(sys_dir, sys_dirent, sys_next) \ while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next) \ if (sys_dirent.d_type == DT_DIR && \ @@ -218,7 +206,8 @@ const char *event_name(struct perf_evsel *evsel) u64 config = evsel->attr.config; int type = evsel->attr.type; - if (type == PERF_TYPE_RAW || type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) { + if (type == PERF_TYPE_RAW || type == PERF_TYPE_HARDWARE || + type == PERF_TYPE_SOFTWARE || type == PERF_TYPE_HW_CACHE) { /* * XXX minimal fix, see comment on perf_evsen__name, this static buffer * will go away together with event_name in the next devel cycle. @@ -252,9 +241,7 @@ const char *__event_name(int type, u64 config) return buf; case PERF_TYPE_SOFTWARE: - if (config < PERF_COUNT_SW_MAX && sw_event_names[config]) - return sw_event_names[config]; - return "unknown-software"; + return __perf_evsel__sw_name(config); case PERF_TYPE_TRACEPOINT: return tracepoint_id_to_name(config); -- cgit v1.2.3 From a446083604fe2bafe0f46b1e95b22f7b06e3a63f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Jun 2012 10:29:12 -0300 Subject: perf evsel: Handle all event types in perf_evsel__name Now to convert all event_name users to perf_evsel__name. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-buuz0j0gynseglxa76r01rdn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 24 +++++++++++------------- tools/perf/util/parse-events.c | 21 +++------------------ 2 files changed, 14 insertions(+), 31 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2da047331173..2ddc81271855 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -252,6 +252,11 @@ static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); } +static int perf_evsel__tracepoint_name(struct perf_evsel *evsel, char *bf, size_t size) +{ + return scnprintf(bf, size, "%s", evsel->name ?: "unknown tracepoint"); +} + int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size) { int ret; @@ -273,20 +278,13 @@ int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size) ret = perf_evsel__sw_name(evsel, bf, size); break; + case PERF_TYPE_TRACEPOINT: + ret = perf_evsel__tracepoint_name(evsel, bf, size); + break; + default: - /* - * FIXME - * - * This is the minimal perf_evsel__name so that we can - * reconstruct event names taking into account event modifiers. - * - * The old event_name uses it now for raw anr hw events, so that - * we don't drag all the parsing stuff into the python binding. - * - * On the next devel cycle the rest of the event naming will be - * brought here. - */ - return 0; + ret = scnprintf(bf, size, "%s", "unknown attr type"); + break; } return ret; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 641c4ac8a838..d73690b11242 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -203,24 +203,9 @@ const char *event_type(int type) const char *event_name(struct perf_evsel *evsel) { - u64 config = evsel->attr.config; - int type = evsel->attr.type; - - if (type == PERF_TYPE_RAW || type == PERF_TYPE_HARDWARE || - type == PERF_TYPE_SOFTWARE || type == PERF_TYPE_HW_CACHE) { - /* - * XXX minimal fix, see comment on perf_evsen__name, this static buffer - * will go away together with event_name in the next devel cycle. - */ - static char bf[128]; - perf_evsel__name(evsel, bf, sizeof(bf)); - return bf; - } - - if (evsel->name) - return evsel->name; - - return __event_name(type, config); + static char bf[128]; + perf_evsel__name(evsel, bf, sizeof(bf)); + return bf; } const char *__event_name(int type, u64 config) -- cgit v1.2.3 From 7289f83cceb437ca56c77eb45b8b1cda15e2e476 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Jun 2012 12:34:58 -0300 Subject: perf tools: Move all users of event_name to perf_evsel__name So that we don't use global variables that could make us misreport event names when having a multi window top, for instance. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-mccancovi1u0wdkg8ncth509@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-evlist.c | 2 +- tools/perf/builtin-record.c | 4 ++-- tools/perf/builtin-report.c | 6 +++--- tools/perf/builtin-stat.c | 12 ++++++------ tools/perf/builtin-test.c | 2 +- tools/perf/builtin-top.c | 10 +++++----- tools/perf/ui/browsers/hists.c | 15 ++++----------- tools/perf/ui/gtk/browser.c | 2 +- tools/perf/util/evsel.c | 26 +++++++++++++------------- tools/perf/util/evsel.h | 2 +- tools/perf/util/header.c | 2 +- tools/perf/util/parse-events.c | 7 ------- tools/perf/util/parse-events.h | 1 - tools/perf/util/session.c | 2 +- tools/perf/util/top.c | 2 +- 15 files changed, 40 insertions(+), 55 deletions(-) diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index acd78dc28341..0dd5a058f766 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -60,7 +60,7 @@ static int __cmd_evlist(const char *input_name, struct perf_attr_details *detail list_for_each_entry(pos, &session->evlist->entries, node) { bool first = true; - printf("%s", event_name(pos)); + printf("%s", perf_evsel__name(pos)); if (details->verbose || details->freq) { comma_printf(&first, " sample_freq=%" PRIu64, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f95840d04e4c..f5a6452931e6 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -265,7 +265,7 @@ try_again: if (err == ENOENT) { ui__error("The %s event is not supported.\n", - event_name(pos)); + perf_evsel__name(pos)); exit(EXIT_FAILURE); } @@ -916,7 +916,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) usage_with_options(record_usage, record_options); list_for_each_entry(pos, &evsel_list->entries, node) { - if (perf_header__push_event(pos->attr.config, event_name(pos))) + if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos))) goto out_free_fd; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 25249f76329d..ea8ce8e1c0d8 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -230,7 +230,7 @@ static int process_read_event(struct perf_tool *tool, struct perf_report *rep = container_of(tool, struct perf_report, tool); if (rep->show_threads) { - const char *name = evsel ? event_name(evsel) : "unknown"; + const char *name = evsel ? perf_evsel__name(evsel) : "unknown"; perf_read_values_add_value(&rep->show_threads_values, event->read.pid, event->read.tid, event->read.id, @@ -239,7 +239,7 @@ static int process_read_event(struct perf_tool *tool, } dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid, - evsel ? event_name(evsel) : "FAIL", + evsel ? perf_evsel__name(evsel) : "FAIL", event->read.value); return 0; @@ -314,7 +314,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, list_for_each_entry(pos, &evlist->entries, node) { struct hists *hists = &pos->hists; - const char *evname = event_name(pos); + const char *evname = perf_evsel__name(pos); hists__fprintf_nr_sample_events(hists, evname, stdout); hists__fprintf(hists, NULL, false, true, 0, 0, stdout); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 262589991ea4..875bf2675326 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -391,7 +391,7 @@ static int read_counter_aggr(struct perf_evsel *counter) if (verbose) { fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", - event_name(counter), count[0], count[1], count[2]); + perf_evsel__name(counter), count[0], count[1], count[2]); } /* @@ -496,7 +496,7 @@ static int run_perf_stat(int argc __used, const char **argv) errno == ENXIO) { if (verbose) ui__warning("%s event is not supported by the kernel.\n", - event_name(counter)); + perf_evsel__name(counter)); counter->supported = false; continue; } @@ -594,7 +594,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) csv_output ? 0 : -4, evsel_list->cpus->map[cpu], csv_sep); - fprintf(output, fmt, cpustr, msecs, csv_sep, event_name(evsel)); + fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel)); if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); @@ -792,7 +792,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) else cpu = 0; - fprintf(output, fmt, cpustr, avg, csv_sep, event_name(evsel)); + fprintf(output, fmt, cpustr, avg, csv_sep, perf_evsel__name(evsel)); if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); @@ -908,7 +908,7 @@ static void print_counter_aggr(struct perf_evsel *counter) counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, csv_sep, csv_output ? 0 : -24, - event_name(counter)); + perf_evsel__name(counter)); if (counter->cgrp) fprintf(output, "%s%s", csv_sep, counter->cgrp->name); @@ -961,7 +961,7 @@ static void print_counter(struct perf_evsel *counter) counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, csv_sep, csv_output ? 0 : -24, - event_name(counter)); + perf_evsel__name(counter)); if (counter->cgrp) fprintf(output, "%s%s", diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 5a8727c08757..5ce30305462b 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -583,7 +583,7 @@ static int test__basic_mmap(void) if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) { pr_debug("expected %d %s events, got %d\n", expected_nr_events[evsel->idx], - event_name(evsel), nr_events[evsel->idx]); + perf_evsel__name(evsel), nr_events[evsel->idx]); goto out_munmap; } } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 6bb0277b7dfe..8090a280578c 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -245,7 +245,7 @@ static void perf_top__show_details(struct perf_top *top) if (notes->src == NULL) goto out_unlock; - printf("Showing %s for %s\n", event_name(top->sym_evsel), symbol->name); + printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name); printf(" Events Pcnt (>=%d%%)\n", top->sym_pcnt_filter); more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx, @@ -408,7 +408,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top) fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top->print_entries); if (top->evlist->nr_entries > 1) - fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(top->sym_evsel)); + fprintf(stdout, "\t[E] active event counter. \t(%s)\n", perf_evsel__name(top->sym_evsel)); fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter); @@ -503,13 +503,13 @@ static void perf_top__handle_keypress(struct perf_top *top, int c) fprintf(stderr, "\nAvailable events:"); list_for_each_entry(top->sym_evsel, &top->evlist->entries, node) - fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, event_name(top->sym_evsel)); + fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel)); prompt_integer(&counter, "Enter details event counter"); if (counter >= top->evlist->nr_entries) { top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node); - fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top->sym_evsel)); + fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel)); sleep(1); break; } @@ -960,7 +960,7 @@ try_again: if (err == ENOENT) { ui__error("The %s event is not supported.\n", - event_name(counter)); + perf_evsel__name(counter)); goto out_err; } else if (err == EMFILE) { ui__error("Too many events are opened.\n" diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f556e5f6388b..482f0517b61e 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1367,7 +1367,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser, struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node); bool current_entry = ui_browser__is_current_entry(browser, row); unsigned long nr_events = evsel->hists.stats.nr_events[PERF_RECORD_SAMPLE]; - const char *ev_name = event_name(evsel); + const char *ev_name = perf_evsel__name(evsel); char bf[256], unit; const char *warn = " "; size_t printed; @@ -1435,7 +1435,7 @@ browse_hists: */ if (timer) timer(arg); - ev_name = event_name(pos); + ev_name = perf_evsel__name(pos); key = perf_evsel__hists_browse(pos, nr_events, help, ev_name, true, timer, arg, delay_secs); @@ -1504,17 +1504,11 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, ui_helpline__push("Press ESC to exit"); list_for_each_entry(pos, &evlist->entries, node) { - const char *ev_name = event_name(pos); + const char *ev_name = perf_evsel__name(pos); size_t line_len = strlen(ev_name) + 7; if (menu.b.width < line_len) menu.b.width = line_len; - /* - * Cache the evsel name, tracepoints have a _high_ cost per - * event_name() call. - */ - if (pos->name == NULL) - pos->name = strdup(ev_name); } return perf_evsel_menu__run(&menu, evlist->nr_entries, help, timer, @@ -1525,11 +1519,10 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, void(*timer)(void *arg), void *arg, int delay_secs) { - if (evlist->nr_entries == 1) { struct perf_evsel *first = list_entry(evlist->entries.next, struct perf_evsel, node); - const char *ev_name = event_name(first); + const char *ev_name = perf_evsel__name(first); return perf_evsel__hists_browse(first, evlist->nr_entries, help, ev_name, false, timer, arg, delay_secs); diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c index fd41e8d10337..ec12e0b4ded6 100644 --- a/tools/perf/ui/gtk/browser.c +++ b/tools/perf/ui/gtk/browser.c @@ -199,7 +199,7 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, list_for_each_entry(pos, &evlist->entries, node) { struct hists *hists = &pos->hists; - const char *evname = event_name(pos); + const char *evname = perf_evsel__name(pos); GtkWidget *scrolled_window; GtkWidget *tab_label; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2ddc81271855..236bdf25db6d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -252,42 +252,42 @@ static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); } -static int perf_evsel__tracepoint_name(struct perf_evsel *evsel, char *bf, size_t size) +const char *perf_evsel__name(struct perf_evsel *evsel) { - return scnprintf(bf, size, "%s", evsel->name ?: "unknown tracepoint"); -} + char bf[128]; -int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size) -{ - int ret; + if (evsel->name) + return evsel->name; switch (evsel->attr.type) { case PERF_TYPE_RAW: - ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config); + scnprintf(bf, sizeof(bf), "raw 0x%" PRIx64, evsel->attr.config); break; case PERF_TYPE_HARDWARE: - ret = perf_evsel__hw_name(evsel, bf, size); + perf_evsel__hw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_HW_CACHE: - ret = perf_evsel__hw_cache_name(evsel, bf, size); + perf_evsel__hw_cache_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_SOFTWARE: - ret = perf_evsel__sw_name(evsel, bf, size); + perf_evsel__sw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_TRACEPOINT: - ret = perf_evsel__tracepoint_name(evsel, bf, size); + scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint"); break; default: - ret = scnprintf(bf, size, "%s", "unknown attr type"); + scnprintf(bf, sizeof(bf), "%s", "unknown attr type"); break; } - return ret; + evsel->name = strdup(bf); + + return evsel->name ?: "unknown"; } void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts, diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 9ae64d9622bc..c936feb4e0a6 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -100,7 +100,7 @@ int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size); const char *__perf_evsel__hw_name(u64 config); const char *__perf_evsel__sw_name(u64 config); -int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size); +const char *perf_evsel__name(struct perf_evsel *evsel); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 2dd5edf161b7..07c8f3792954 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -641,7 +641,7 @@ static int write_event_desc(int fd, struct perf_header *h __used, /* * write event string as passed on cmdline */ - ret = do_write_string(fd, event_name(attr)); + ret = do_write_string(fd, perf_evsel__name(attr)); if (ret < 0) return ret; /* diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d73690b11242..517e6473982c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -201,13 +201,6 @@ const char *event_type(int type) return "unknown"; } -const char *event_name(struct perf_evsel *evsel) -{ - static char bf[128]; - perf_evsel__name(evsel, bf, sizeof(bf)); - return bf; -} - const char *__event_name(int type, u64 config) { static char buf[32]; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 8cac57ab4ee6..d7eb070937c4 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -26,7 +26,6 @@ extern struct tracepoint_path *tracepoint_id_to_path(u64 config); extern bool have_tracepoints(struct list_head *evlist); const char *event_type(int type); -const char *event_name(struct perf_evsel *event); extern const char *__event_name(int type, u64 config); extern int parse_events_option(const struct option *opt, const char *str, diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 2600916efa83..582ee38ed216 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1439,7 +1439,7 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) ret += hists__fprintf_nr_events(&session->hists, fp); list_for_each_entry(pos, &session->evlist->entries, node) { - ret += fprintf(fp, "%s stats:\n", event_name(pos)); + ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos)); ret += hists__fprintf_nr_events(&pos->hists, fp); } diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index abe0e8e95068..7eeebcee291c 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -65,7 +65,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) top->freq ? "Hz" : ""); } - ret += SNPRINTF(bf + ret, size - ret, "%s", event_name(top->sym_evsel)); + ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel)); ret += SNPRINTF(bf + ret, size - ret, "], "); -- cgit v1.2.3 From 5bff01f66db1753abcae03a06b21e56e7e9d9fa9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Jun 2012 13:35:44 -0300 Subject: perf script: Replace __event_name uses with perf_evsel__name No logic change, just remove one more user of __event_name(). Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-e4f0vuy3283hmzfjjvkgm7fo@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 8e395a538eb9..8f9f9b6140db 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -137,10 +137,11 @@ static const char *output_field2str(enum perf_output_field field) #define PRINT_FIELD(x) (output[attr->type].fields & PERF_OUTPUT_##x) -static int perf_event_attr__check_stype(struct perf_event_attr *attr, - u64 sample_type, const char *sample_msg, - enum perf_output_field field) +static int perf_evsel__check_stype(struct perf_evsel *evsel, + u64 sample_type, const char *sample_msg, + enum perf_output_field field) { + struct perf_event_attr *attr = &evsel->attr; int type = attr->type; const char *evname; @@ -148,7 +149,7 @@ static int perf_event_attr__check_stype(struct perf_event_attr *attr, return 0; if (output[type].user_set) { - evname = __event_name(attr->type, attr->config); + evname = perf_evsel__name(evsel); pr_err("Samples for '%s' event do not have %s attribute set. " "Cannot print '%s' field.\n", evname, sample_msg, output_field2str(field)); @@ -157,7 +158,7 @@ static int perf_event_attr__check_stype(struct perf_event_attr *attr, /* user did not ask for it explicitly so remove from the default list */ output[type].fields &= ~field; - evname = __event_name(attr->type, attr->config); + evname = perf_evsel__name(evsel); pr_debug("Samples for '%s' event do not have %s attribute set. " "Skipping '%s' field.\n", evname, sample_msg, output_field2str(field)); @@ -175,8 +176,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, return -EINVAL; if (PRINT_FIELD(IP)) { - if (perf_event_attr__check_stype(attr, PERF_SAMPLE_IP, "IP", - PERF_OUTPUT_IP)) + if (perf_evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP", + PERF_OUTPUT_IP)) return -EINVAL; if (!no_callchain && @@ -185,8 +186,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, } if (PRINT_FIELD(ADDR) && - perf_event_attr__check_stype(attr, PERF_SAMPLE_ADDR, "ADDR", - PERF_OUTPUT_ADDR)) + perf_evsel__check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR", + PERF_OUTPUT_ADDR)) return -EINVAL; if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { @@ -208,18 +209,18 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, } if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && - perf_event_attr__check_stype(attr, PERF_SAMPLE_TID, "TID", - PERF_OUTPUT_TID|PERF_OUTPUT_PID)) + perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", + PERF_OUTPUT_TID|PERF_OUTPUT_PID)) return -EINVAL; if (PRINT_FIELD(TIME) && - perf_event_attr__check_stype(attr, PERF_SAMPLE_TIME, "TIME", - PERF_OUTPUT_TIME)) + perf_evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME", + PERF_OUTPUT_TIME)) return -EINVAL; if (PRINT_FIELD(CPU) && - perf_event_attr__check_stype(attr, PERF_SAMPLE_CPU, "CPU", - PERF_OUTPUT_CPU)) + perf_evsel__check_stype(evsel, PERF_SAMPLE_CPU, "CPU", + PERF_OUTPUT_CPU)) return -EINVAL; return 0; @@ -258,9 +259,10 @@ static int perf_session__check_output_opt(struct perf_session *session) static void print_sample_start(struct perf_sample *sample, struct thread *thread, - struct perf_event_attr *attr) + struct perf_evsel *evsel) { int type; + struct perf_event_attr *attr = &evsel->attr; struct event_format *event; const char *evname = NULL; unsigned long secs; @@ -305,7 +307,7 @@ static void print_sample_start(struct perf_sample *sample, if (event) evname = event->name; } else - evname = __event_name(attr->type, attr->config); + evname = perf_evsel__name(evsel); printf("%s: ", evname ? evname : "[unknown]"); } @@ -412,7 +414,7 @@ static void process_event(union perf_event *event __unused, if (output[attr->type].fields == 0) return; - print_sample_start(sample, thread, attr); + print_sample_start(sample, thread, evsel); if (is_bts_event(attr)) { print_sample_bts(event, sample, evsel, machine, thread); -- cgit v1.2.3 From 22c8b84320ecf9ecbb6587d46a259b828e9ece5e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Jun 2012 13:55:13 -0300 Subject: perf tools: Don't access evsel->name directly One needs to use perf_evsel__name() so that if needed the name gets synthesized and stored in evsel->name, from where perf_evsel__name() will serve from them on. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ml7zbenjmri9bghmrea0jm0d@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 2 +- tools/perf/util/parse-events-test.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index b125e07eb399..9fe77b185338 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1601,7 +1601,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool, if (thread == NULL) { pr_debug("problem processing %s event, skipping it.\n", - evsel->name); + perf_evsel__name(evsel)); return -1; } diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index 76b98e2a587d..d0cf7c1ed068 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -418,14 +418,14 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); - TEST_ASSERT_VAL("wrong name", !strcmp(evsel->name, "krava")); + TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava")); /* cpu/config=2/" */ evsel = list_entry(evsel->node.next, struct perf_evsel, node); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config); - TEST_ASSERT_VAL("wrong name", !strcmp(evsel->name, "raw 0x2")); + TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "raw 0x2")); return 0; } -- cgit v1.2.3 From 9db1763c72b1548626ae9d634015de4f07ef624f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Jun 2012 13:45:00 -0300 Subject: perf tools: Remove __event_name Not needed anymore, the parsing code can just leave evsel->name as NULL and the first call to perf_evsel__name() will do exactly what was being pre-cached using __event_name(). Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-cn2eiijcinnc97buod8cs34m@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 61 +++++------------------------------------- tools/perf/util/parse-events.h | 1 - 2 files changed, 6 insertions(+), 56 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 517e6473982c..eacf932a36a0 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -161,24 +161,6 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) return NULL; } -#define TP_PATH_LEN (MAX_EVENT_LENGTH * 2 + 1) -static const char *tracepoint_id_to_name(u64 config) -{ - static char buf[TP_PATH_LEN]; - struct tracepoint_path *path; - - path = tracepoint_id_to_path(config); - if (path) { - snprintf(buf, TP_PATH_LEN, "%s:%s", path->system, path->name); - free(path->name); - free(path->system); - free(path); - } else - snprintf(buf, TP_PATH_LEN, "%s:%s", "unknown", "unknown"); - - return buf; -} - const char *event_type(int type) { switch (type) { @@ -201,36 +183,6 @@ const char *event_type(int type) return "unknown"; } -const char *__event_name(int type, u64 config) -{ - static char buf[32]; - - if (type == PERF_TYPE_RAW) { - sprintf(buf, "raw 0x%" PRIx64, config); - return buf; - } - - switch (type) { - case PERF_TYPE_HARDWARE: - return __perf_evsel__hw_name(config); - - case PERF_TYPE_HW_CACHE: - __perf_evsel__hw_cache_name(config, buf, sizeof(buf)); - return buf; - - case PERF_TYPE_SOFTWARE: - return __perf_evsel__sw_name(config); - - case PERF_TYPE_TRACEPOINT: - return tracepoint_id_to_name(config); - - default: - break; - } - - return "unknown"; -} - static int add_event(struct list_head **_list, int *idx, struct perf_event_attr *attr, char *name) { @@ -252,7 +204,8 @@ static int add_event(struct list_head **_list, int *idx, return -ENOMEM; } - evsel->name = strdup(name); + if (name) + evsel->name = strdup(name); list_add_tail(&evsel->node, list); *_list = list; return 0; @@ -545,8 +498,7 @@ int parse_events_add_numeric(struct list_head **list, int *idx, config_attr(&attr, head_config, 1)) return -EINVAL; - return add_event(list, idx, &attr, - (char *) __event_name(type, config)); + return add_event(list, idx, &attr, NULL); } static int parse_events__is_name_term(struct parse_events__term *term) @@ -554,8 +506,7 @@ static int parse_events__is_name_term(struct parse_events__term *term) return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME; } -static char *pmu_event_name(struct perf_event_attr *attr, - struct list_head *head_terms) +static char *pmu_event_name(struct list_head *head_terms) { struct parse_events__term *term; @@ -563,7 +514,7 @@ static char *pmu_event_name(struct perf_event_attr *attr, if (parse_events__is_name_term(term)) return term->val.str; - return (char *) __event_name(PERF_TYPE_RAW, attr->config); + return NULL; } int parse_events_add_pmu(struct list_head **list, int *idx, @@ -588,7 +539,7 @@ int parse_events_add_pmu(struct list_head **list, int *idx, return -EINVAL; return add_event(list, idx, &attr, - pmu_event_name(&attr, head_config)); + pmu_event_name(head_config)); } void parse_events_update_lists(struct list_head *list_event, diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index d7eb070937c4..1784f06e3a6a 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -26,7 +26,6 @@ extern struct tracepoint_path *tracepoint_id_to_path(u64 config); extern bool have_tracepoints(struct list_head *evlist); const char *event_type(int type); -extern const char *__event_name(int type, u64 config); extern int parse_events_option(const struct option *opt, const char *str, int unset); -- cgit v1.2.3 From 6eef3d9c2bcf52b7a3c18e609f5838c007b989a4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 13 Jun 2012 11:53:37 -0300 Subject: perf evsel: Reconstruct raw event with modifiers from perf_event_attr I forgot to add the modifiers to raw events too, fix it. Reported-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-pi267j1aqqjti9rqh9qy4g58@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 236bdf25db6d..8f0e9dd03775 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -252,6 +252,12 @@ static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); } +static int perf_evsel__raw_name(struct perf_evsel *evsel, char *bf, size_t size) +{ + int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config); + return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); +} + const char *perf_evsel__name(struct perf_evsel *evsel) { char bf[128]; @@ -261,7 +267,7 @@ const char *perf_evsel__name(struct perf_evsel *evsel) switch (evsel->attr.type) { case PERF_TYPE_RAW: - scnprintf(bf, sizeof(bf), "raw 0x%" PRIx64, evsel->attr.config); + perf_evsel__raw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_HARDWARE: -- cgit v1.2.3 From a9c34a9f9c677fcbe06bd3eda8d6caa3487b4a65 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 11 Jun 2012 15:20:03 +0200 Subject: perf tools: Remove unused evsel parameter from machine__resolve_callchain Removing unused evsel parameter from machine__resolve_callchain function. Plus related header file and callers changes. The evsel parameter is unused since following commit: perf callchain: Make callchain cursors TLS commit 472606458f3e1ced5fe3cc5f04e90a6b5a4732cf Author: Namhyung Kim Date: Thu May 31 14:43:26 2012 +0900 Signed-off-by: Jiri Olsa Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1339420814-7379-9-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 4 ++-- tools/perf/builtin-script.c | 4 ++-- tools/perf/builtin-top.c | 2 +- tools/perf/util/map.h | 2 +- tools/perf/util/session.c | 7 +++---- tools/perf/util/session.h | 4 ++-- 6 files changed, 11 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ea8ce8e1c0d8..40b0ffc3ad3b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -69,7 +69,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { - err = machine__resolve_callchain(machine, evsel, al->thread, + err = machine__resolve_callchain(machine, al->thread, sample->callchain, &parent); if (err) return err; @@ -140,7 +140,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, struct hist_entry *he; if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { - err = machine__resolve_callchain(machine, evsel, al->thread, + err = machine__resolve_callchain(machine, al->thread, sample->callchain, &parent); if (err) return err; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 8f9f9b6140db..8fecd3b8130a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -389,7 +389,7 @@ static void print_sample_bts(union perf_event *event, printf(" "); else printf("\n"); - perf_event__print_ip(event, sample, machine, evsel, + perf_event__print_ip(event, sample, machine, PRINT_FIELD(SYM), PRINT_FIELD(DSO), PRINT_FIELD(SYMOFFSET)); } @@ -433,7 +433,7 @@ static void process_event(union perf_event *event __unused, printf(" "); else printf("\n"); - perf_event__print_ip(event, sample, machine, evsel, + perf_event__print_ip(event, sample, machine, PRINT_FIELD(SYM), PRINT_FIELD(DSO), PRINT_FIELD(SYMOFFSET)); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 8090a280578c..e3cab5f088f8 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -774,7 +774,7 @@ static void perf_event__process_sample(struct perf_tool *tool, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { - err = machine__resolve_callchain(machine, evsel, al.thread, + err = machine__resolve_callchain(machine, al.thread, sample->callchain, &parent); if (err) return; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 81371bad4ef0..c14c665d9a25 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -157,7 +157,7 @@ void machine__exit(struct machine *self); void machine__delete(struct machine *self); int machine__resolve_callchain(struct machine *machine, - struct perf_evsel *evsel, struct thread *thread, + struct thread *thread, struct ip_callchain *chain, struct symbol **parent); int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name, diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 582ee38ed216..febc0aeb3c66 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -289,7 +289,6 @@ struct branch_info *machine__resolve_bstack(struct machine *self, } int machine__resolve_callchain(struct machine *self, - struct perf_evsel *evsel __used, struct thread *thread, struct ip_callchain *chain, struct symbol **parent) @@ -1480,8 +1479,8 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, } void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, - struct machine *machine, struct perf_evsel *evsel, - int print_sym, int print_dso, int print_symoffset) + struct machine *machine, int print_sym, + int print_dso, int print_symoffset) { struct addr_location al; struct callchain_cursor_node *node; @@ -1495,7 +1494,7 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, if (symbol_conf.use_callchain && sample->callchain) { - if (machine__resolve_callchain(machine, evsel, al.thread, + if (machine__resolve_callchain(machine, al.thread, sample->callchain, NULL) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 7a5434c00565..877d78186f2c 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -150,8 +150,8 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, unsigned int type); void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, - struct machine *machine, struct perf_evsel *evsel, - int print_sym, int print_dso, int print_symoffset); + struct machine *machine, int print_sym, + int print_dso, int print_symoffset); int perf_session__cpu_bitmap(struct perf_session *session, const char *cpu_list, unsigned long *cpu_bitmap); -- cgit v1.2.3 From dd4f52232c60bcb41205a67d2df2ac0ecdfe3683 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 13 Jun 2012 15:52:42 -0300 Subject: perf evsel: Make some methods private Now that __event_name is gone, no need to export __perf_evsel__[hs]w_name(). Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-rpjnarbt83nu9uowrfatmy12@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 6 +++--- tools/perf/util/evsel.h | 5 ----- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 8f0e9dd03775..876f639d69ed 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -78,7 +78,7 @@ static const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = { "ref-cycles", }; -const char *__perf_evsel__hw_name(u64 config) +static const char *__perf_evsel__hw_name(u64 config) { if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config]) return perf_evsel__hw_names[config]; @@ -140,7 +140,7 @@ static const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = { "emulation-faults", }; -const char *__perf_evsel__sw_name(u64 config) +static const char *__perf_evsel__sw_name(u64 config) { if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config]) return perf_evsel__sw_names[config]; @@ -219,7 +219,7 @@ int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, perf_evsel__hw_cache_op[op][1]); } -int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size) +static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size) { u8 op, result, type = (config >> 0) & 0xff; const char *err = "unknown-ext-hardware-cache-type"; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index c936feb4e0a6..67cc5033d192 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -95,11 +95,6 @@ const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] [PERF_EVSEL__MAX_ALIASES]; int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); -int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size); - -const char *__perf_evsel__hw_name(u64 config); -const char *__perf_evsel__sw_name(u64 config); - const char *perf_evsel__name(struct perf_evsel *evsel); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); -- cgit v1.2.3 From c0a58fb2bdf033df433cad9009c7dac4c6b872b0 Mon Sep 17 00:00:00 2001 From: Samuel Liao Date: Tue, 5 Jun 2012 13:14:59 +0800 Subject: perf annotate: Check null of sym pointer before using it Sym may be NULL, and that will cause perf to crash. Signed-off-by: Shan Wei Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/4FCD95D3.90209@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 34b1c46eaf42..67a2703e666a 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -814,7 +814,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, { struct disasm_line *pos, *n; struct annotation *notes; - const size_t size = symbol__size(sym); + size_t size; struct map_symbol ms = { .map = map, .sym = sym, @@ -834,6 +834,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, if (sym == NULL) return -1; + size = symbol__size(sym); + if (map->dso->annotate_warned) return -1; -- cgit v1.2.3 From 0718467c859f5571dc48d294596f841096f6a47a Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Mon, 18 Jun 2012 15:56:33 -0400 Subject: x86/nmi: Clean up register_nmi_handler() usage Implement a cleaner and easier to maintain version for the section warning fixes implemented in commit eeaaa96a3a21 ("x86/nmi: Fix section mismatch warnings on 32-bit"). Signed-off-by: Li Zhong Signed-off-by: Don Zickus Cc: Jan Beulich Link: http://lkml.kernel.org/r/1340049393-17771-1-git-send-email-dzickus@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 20 +++----------------- arch/x86/kernel/nmi_selftest.c | 7 ++++--- 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index dc580c42851c..c0fa356e90de 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -44,28 +44,14 @@ struct nmiaction { const char *name; }; -#define register_nmi_handler(t, fn, fg, n) \ +#define register_nmi_handler(t, fn, fg, n, init...) \ ({ \ - static struct nmiaction fn##_na = { \ + static struct nmiaction init fn##_na = { \ .handler = (fn), \ .name = (n), \ .flags = (fg), \ }; \ - __register_nmi_handler((t), &fn##_na); \ -}) - -/* - * For special handlers that register/unregister in the - * init section only. This should be considered rare. - */ -#define register_nmi_handler_initonly(t, fn, fg, n) \ -({ \ - static struct nmiaction fn##_na __initdata = { \ - .handler = (fn), \ - .name = (n), \ - .flags = (fg), \ - }; \ - __register_nmi_handler((t), &fn##_na); \ + __register_nmi_handler((t), &fn##_na); \ }) int __register_nmi_handler(unsigned int, struct nmiaction *); diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c index 149b8d9c6ad4..6d9582ec0324 100644 --- a/arch/x86/kernel/nmi_selftest.c +++ b/arch/x86/kernel/nmi_selftest.c @@ -42,7 +42,8 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs) static void __init init_nmi_testsuite(void) { /* trap all the unknown NMIs we may generate */ - register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); + register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk", + __initdata); } static void __init cleanup_nmi_testsuite(void) @@ -64,8 +65,8 @@ static void __init test_nmi_ipi(struct cpumask *mask) { unsigned long timeout; - if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback, - NMI_FLAG_FIRST, "nmi_selftest")) { + if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, + NMI_FLAG_FIRST, "nmi_selftest", __initdata)) { nmi_fail = FAILURE; return; } -- cgit v1.2.3 From e1b6fc55da40bc17e20795901cb786e3619f9be9 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 18 Jun 2012 11:28:45 +0100 Subject: x86/microcode: Mark microcode_id[] as __initconst It's not being used for other than creating module aliases (i.e. no loadable section has any reference to it). Signed-off-by: Jan Beulich Link: http://lkml.kernel.org/r/4FDF1EFD020000780008A65D@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index fbdfc6917180..c383b3f8f397 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -504,7 +504,7 @@ static struct notifier_block __refdata mc_cpu_notifier = { #ifdef MODULE /* Autoload on Intel and AMD systems */ -static const struct x86_cpu_id microcode_id[] = { +static const struct x86_cpu_id __initconst microcode_id[] = { #ifdef CONFIG_MICROCODE_INTEL { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, }, #endif -- cgit v1.2.3 From 0d26d1d873a302828e064737746c53a2689e6c0f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 18 Jun 2012 11:30:20 +0100 Subject: x86/mm: Mark free_initrd_mem() as __init ... matching various other architectures. Signed-off-by: Jan Beulich Link: http://lkml.kernel.org/r/4FDF1F5C020000780008A661@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index bc4e9d84157f..e0e6990723e9 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -385,7 +385,7 @@ void free_initmem(void) } #ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) +void __init free_initrd_mem(unsigned long start, unsigned long end) { /* * end could be not aligned, and We can not align that, -- cgit v1.2.3 From 0fa0e2f02e8edfbdb5f86d1cab0fa6dc0517489f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 18 Jun 2012 11:40:04 +0100 Subject: x86: Move call to print_modules() out of show_regs() Printing the list of loaded modules is really unrelated to what this function is about, and is particularly unnecessary in the context of the SysRQ key handling (gets printed so far over and over). It should really be the caller of the function to decide whether this piece of information is useful (and to avoid redundantly printing it). Signed-off-by: Jan Beulich Link: http://lkml.kernel.org/r/4FDF21A4020000780008A67F@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 1 + arch/x86/kernel/dumpstack_32.c | 1 - arch/x86/kernel/dumpstack_64.c | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 87d3b5d663cd..ae42418bc50f 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -271,6 +271,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) return 1; + print_modules(); show_regs(regs); #ifdef CONFIG_X86_32 if (user_mode_vm(regs)) { diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 3a8aced11ae9..1038a417ea53 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -86,7 +86,6 @@ void show_regs(struct pt_regs *regs) { int i; - print_modules(); __show_regs(regs, !user_mode_vm(regs)); pr_emerg("Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index c582e9c5bd1a..b653675d5288 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -254,7 +254,6 @@ void show_regs(struct pt_regs *regs) sp = regs->sp; printk("CPU %d ", cpu); - print_modules(); __show_regs(regs, 1); printk(KERN_DEFAULT "Process %s (pid: %d, threadinfo %p, task %p)\n", cur->comm, cur->pid, task_thread_info(cur), cur); -- cgit v1.2.3 From 2b1b712f050eaf0ac576591281446dc960c0afc5 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 20 Jun 2012 21:18:14 -0700 Subject: x86, reboot: Drop redundant write of reboot_mode We write reboot_mode to BIOS location 0x472 in native_machine_emergency_restart() (reboot.c:542) already, there is no need to then write it again in machine_real_restart(). This means nothing gets written there for MRR_APM, but the APM call is a poweroff call and doesn't use this memory location. Link: http://lkml.kernel.org/n/tip-3i0pfh44c1e3jv5lab0cf7sc@git.kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/reboot.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 6ddb9cd0ced0..6ef559f09acd 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -179,14 +179,6 @@ void __noreturn machine_real_restart(unsigned int type) write_cr3(real_mode_header->trampoline_pgd); #endif - /* - * Write 0x1234 to absolute memory location 0x472. The BIOS reads - * this on booting to tell it to "Bypass memory test (also warm - * boot)". This seems like a fairly standard thing that gets set by - * REBOOT.COM programs, and the previous reset routine did this - * too. */ - *((unsigned short *)0x472) = reboot_mode; - /* Jump to the identity-mapped low memory code */ #ifdef CONFIG_X86_32 asm volatile("jmpl *%0" : : -- cgit v1.2.3 From 9751d7627582fc1cc64625d63bde9528c14f1544 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 21 Jun 2012 10:25:03 -0700 Subject: x86-64, reboot: Be more paranoid in 64-bit reboot=bios Be a bit more paranoid in the transition back to 16-bit mode. In particular, in case the kernel is residing above the 4 GiB mark, switch to the trampoline GDT, and make the jump after turning off paging a far jump. In theory, none of this should matter, but it is exactly the kind of things that broken SMM or virtualization software could trip up on. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/tip-jopx7y6g6dbcx4tpal8q0jlr@git.kernel.org --- arch/x86/realmode/rm/reboot.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/realmode/rm/reboot.S b/arch/x86/realmode/rm/reboot.S index 6bf8feac5557..f932ea61d1c8 100644 --- a/arch/x86/realmode/rm/reboot.S +++ b/arch/x86/realmode/rm/reboot.S @@ -22,14 +22,18 @@ ENTRY(machine_real_restart_asm) #ifdef CONFIG_X86_64 + /* Switch to trampoline GDT as it is guaranteed < 4 GiB */ + movl $__KERNEL_DS, %eax + movl %eax, %ds + lgdtl pa_tr_gdt /* Disable paging to drop us out of long mode */ movl %cr0, %eax andl $~X86_CR0_PG, %eax movl %eax, %cr0 - jmp 1f /* "A branch" may be needed here, assume near is OK */ + ljmpl $__KERNEL32_CS, $pa_machine_real_restart_paging_off -1: +GLOBAL(machine_real_restart_paging_off) xorl %eax, %eax xorl %edx, %edx movl $MSR_EFER, %ecx -- cgit v1.2.3 From f8bbfd7d28303967ca4e8597de9bdc9bf8b197e7 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 23 Feb 2012 17:07:06 +0100 Subject: oprofile, perf: Use per-cpu framework This changes oprofile_perf.c to use the per-cpu framework. Using the per-cpu framework should avoid error like the following: arch/arm/oprofile/../../../drivers/oprofile/oprofile_perf.c:28:28: error: variably modified 'perf_events' at file scope Reported-by: William Cohen Cc: Will Deacon Signed-off-by: Robert Richter --- drivers/oprofile/oprofile_perf.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c index efc4b7f308cf..f3cfa0b9adfa 100644 --- a/drivers/oprofile/oprofile_perf.c +++ b/drivers/oprofile/oprofile_perf.c @@ -1,5 +1,6 @@ /* * Copyright 2010 ARM Ltd. + * Copyright 2012 Advanced Micro Devices, Inc., Robert Richter * * Perf-events backend for OProfile. */ @@ -25,7 +26,7 @@ static int oprofile_perf_enabled; static DEFINE_MUTEX(oprofile_perf_mutex); static struct op_counter_config *counter_config; -static struct perf_event **perf_events[NR_CPUS]; +static DEFINE_PER_CPU(struct perf_event **, perf_events); static int num_counters; /* @@ -38,7 +39,7 @@ static void op_overflow_handler(struct perf_event *event, u32 cpu = smp_processor_id(); for (id = 0; id < num_counters; ++id) - if (perf_events[cpu][id] == event) + if (per_cpu(perf_events, cpu)[id] == event) break; if (id != num_counters) @@ -74,7 +75,7 @@ static int op_create_counter(int cpu, int event) { struct perf_event *pevent; - if (!counter_config[event].enabled || perf_events[cpu][event]) + if (!counter_config[event].enabled || per_cpu(perf_events, cpu)[event]) return 0; pevent = perf_event_create_kernel_counter(&counter_config[event].attr, @@ -91,18 +92,18 @@ static int op_create_counter(int cpu, int event) return -EBUSY; } - perf_events[cpu][event] = pevent; + per_cpu(perf_events, cpu)[event] = pevent; return 0; } static void op_destroy_counter(int cpu, int event) { - struct perf_event *pevent = perf_events[cpu][event]; + struct perf_event *pevent = per_cpu(perf_events, cpu)[event]; if (pevent) { perf_event_release_kernel(pevent); - perf_events[cpu][event] = NULL; + per_cpu(perf_events, cpu)[event] = NULL; } } @@ -257,12 +258,12 @@ void oprofile_perf_exit(void) for_each_possible_cpu(cpu) { for (id = 0; id < num_counters; ++id) { - event = perf_events[cpu][id]; + event = per_cpu(perf_events, cpu)[id]; if (event) perf_event_release_kernel(event); } - kfree(perf_events[cpu]); + kfree(per_cpu(perf_events, cpu)); } kfree(counter_config); @@ -277,8 +278,6 @@ int __init oprofile_perf_init(struct oprofile_operations *ops) if (ret) return ret; - memset(&perf_events, 0, sizeof(perf_events)); - num_counters = perf_num_counters(); if (num_counters <= 0) { pr_info("oprofile: no performance counters\n"); @@ -298,9 +297,9 @@ int __init oprofile_perf_init(struct oprofile_operations *ops) } for_each_possible_cpu(cpu) { - perf_events[cpu] = kcalloc(num_counters, + per_cpu(perf_events, cpu) = kcalloc(num_counters, sizeof(struct perf_event *), GFP_KERNEL); - if (!perf_events[cpu]) { + if (!per_cpu(perf_events, cpu)) { pr_info("oprofile: failed to allocate %d perf events " "for cpu %d\n", num_counters, cpu); ret = -ENOMEM; -- cgit v1.2.3 From d9b0cde91c60da0ed5f92cdc3ac878142e6b5f27 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Tue, 29 May 2012 14:31:23 -0700 Subject: x86-64, gcc: Use -mpreferred-stack-boundary=3 if supported On x86-64, the standard ABI requires alignment to 16 bytes. However, this is not actually necessary in the kernel (we don't do SSE except in very controlled ways); and furthermore, the standard kernel entry on x86-64 actually leaves the stack on an odd 8-byte boundary, which means that gcc will generate extra instructions to keep the stack *mis*aligned! gcc 4.8 adds an -mpreferred-stack-boundary=3 option to override this and lets us save some stack space and a handful of instructions. Note that this causes us to pass -mno-sse twice; this is redundant, but necessary since the cc-option test will fail unless -mno-sse is passed on the same command line. [ hpa: rewrote the patch description ] Signed-off-by: H.J. Lu Link: http://lkml.kernel.org/r/CAMe9rOqPfy3JcZRLaUeCjBe9BVY-P6e0uaSbMi5hvS-6WwQueg@mail.gmail.com Signed-off-by: H. Peter Anvin --- arch/x86/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1f2521434554..b0c5276861ec 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -49,6 +49,9 @@ else KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 + # Use -mpreferred-stack-boundary=3 if supported. + KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3) + # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) -- cgit v1.2.3 From 357398e96d8c883b010379a7669df43ed0e2e32b Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 20 Jun 2012 18:39:27 +0200 Subject: perf/x86: Fix section mismatch in uncore_pci_init() Fix section mismatch in uncore_pci_init(): WARNING: vmlinux.o(.init.text+0x9246): Section mismatch in reference from the function uncore_pci_init() to the function .devexit.text:uncore_pci_remove() The function __init uncore_pci_init() references a function __devexit uncore_pci_remove(). [...] Signed-off-by: Robert Richter Cc: Cc: Link: http://lkml.kernel.org/r/20120620163927.GI5046@erda.amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 28a8413ca199..6f43f9584e3d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -1313,7 +1313,7 @@ static int __devinit uncore_pci_add(struct intel_uncore_type *type, return 0; } -static void __devexit uncore_pci_remove(struct pci_dev *pdev) +static void uncore_pci_remove(struct pci_dev *pdev) { struct intel_uncore_box *box = pci_get_drvdata(pdev); struct intel_uncore_pmu *pmu = box->pmu; -- cgit v1.2.3 From 11cab711f686893f2696a061dfca30454a624784 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Fri, 22 Jun 2012 08:12:12 -0500 Subject: x86/uv: Fix the UV BAU destination timeout period Correct the calculation of a destination timeout period, which is used to distinguish between a destination timeout and the situation where all the target software ack resources are full and a request is returned immediately. The problem is that integer arithmetic was overflowing, yielding a very large result. Without this fix destination timeouts are identified as resource 'plugged' events and an ipi method of resource releasing is unnecessarily employed. Signed-off-by: Cliff Wickman Link: http://lkml.kernel.org/r/20120622131212.GA31884@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/tlb_uv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 59880afa851f..0c48d438cbba 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1811,8 +1811,8 @@ static int calculate_destination_timeout(void) index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; - base = timeout_base_ns[index]; - ts_ns = base * mult1 * mult2; + ts_ns = timeout_base_ns[index]; + ts_ns *= (mult1 * mult2); ret = ts_ns / 1000; } else { /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */ -- cgit v1.2.3 From 26ef85770c765bb8b6b6922f8a413872dd8e3979 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Fri, 22 Jun 2012 08:13:30 -0500 Subject: x86/uv: Implement UV BAU runtime enable and disable control via /proc/sgi_uv/ This patch enables the BAU to be turned on or off dynamically. echo "on" > /proc/sgi_uv/ptc_statistics echo "off" > /proc/sgi_uv/ptc_statistics The system may be booted with or without the nobau option. Whether the system currently has the BAU off can be seen in the /proc file -- normally with the baustats script. Each cpu will have a 1 in the bauoff field if the BAU was turned off, so baustats will give a count of cpus that have it off. Signed-off-by: Cliff Wickman Link: http://lkml.kernel.org/r/20120622131330.GB31884@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 2 ++ arch/x86/platform/uv/tlb_uv.c | 76 ++++++++++++++++++++++++++++++++-------- 2 files changed, 63 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 6149b476d9df..847c00b721b2 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -520,6 +520,7 @@ struct ptc_stats { unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */ unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */ unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */ + unsigned long s_enters; /* entries to the driver */ /* destination statistics */ unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */ @@ -586,6 +587,7 @@ struct bau_control { int timeout_tries; int ipi_attempts; int conseccompletes; + short nobau; int baudisabled; int set_bau_off; short cpu; diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 0c48d438cbba..1492170cbb5a 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -38,6 +38,7 @@ static int timeout_base_ns[] = { static int timeout_us; static int nobau; +static int nobau_perm; static int baudisabled; static spinlock_t disable_lock; static cycles_t congested_cycles; @@ -120,6 +121,40 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats); static DEFINE_PER_CPU(struct bau_control, bau_control); static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); +static void +set_bau_on(void) +{ + int cpu; + struct bau_control *bcp; + + if (nobau_perm) { + pr_info("BAU not initialized; cannot be turned on\n"); + return; + } + nobau = 0; + for_each_present_cpu(cpu) { + bcp = &per_cpu(bau_control, cpu); + bcp->nobau = 0; + } + pr_info("BAU turned on\n"); + return; +} + +static void +set_bau_off(void) +{ + int cpu; + struct bau_control *bcp; + + nobau = 1; + for_each_present_cpu(cpu) { + bcp = &per_cpu(bau_control, cpu); + bcp->nobau = 1; + } + pr_info("BAU turned off\n"); + return; +} + /* * Determine the first node on a uvhub. 'Nodes' are used for kernel * memory allocation. @@ -1079,12 +1114,12 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, struct ptc_stats *stat; struct bau_control *bcp; - /* kernel was booted 'nobau' */ - if (nobau) - return cpumask; - bcp = &per_cpu(bau_control, cpu); stat = bcp->statp; + stat->s_enters++; + + if (bcp->nobau) + return cpumask; /* bau was disabled due to slow response */ if (bcp->baudisabled) { @@ -1338,29 +1373,32 @@ static inline unsigned long long usec_2_cycles(unsigned long microsec) static int ptc_seq_show(struct seq_file *file, void *data) { struct ptc_stats *stat; + struct bau_control *bcp; int cpu; cpu = *(loff_t *)data; if (!cpu) { seq_printf(file, - "# cpu sent stime self locals remotes ncpus localhub "); + "# cpu bauoff sent stime self locals remotes ncpus localhub "); seq_printf(file, "remotehub numuvhubs numuvhubs16 numuvhubs8 "); seq_printf(file, "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok "); seq_printf(file, - "resetp resett giveup sto bz throt swack recv rtime "); + "resetp resett giveup sto bz throt enters swack recv rtime "); seq_printf(file, "all one mult none retry canc nocan reset rcan "); seq_printf(file, "disable enable wars warshw warwaits\n"); } if (cpu < num_possible_cpus() && cpu_online(cpu)) { - stat = &per_cpu(ptcstats, cpu); + bcp = &per_cpu(bau_control, cpu); + stat = bcp->statp; /* source side statistics */ seq_printf(file, - "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", - cpu, stat->s_requestor, cycles_2_us(stat->s_time), + "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", + cpu, bcp->nobau, stat->s_requestor, + cycles_2_us(stat->s_time), stat->s_ntargself, stat->s_ntarglocals, stat->s_ntargremotes, stat->s_ntargcpu, stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, @@ -1369,11 +1407,11 @@ static int ptc_seq_show(struct seq_file *file, void *data) stat->s_ntarguvhub8, stat->s_ntarguvhub4, stat->s_ntarguvhub2, stat->s_ntarguvhub1, stat->s_dtimeout, stat->s_strongnacks); - seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", + seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld ", stat->s_retry_messages, stat->s_retriesok, stat->s_resets_plug, stat->s_resets_timeout, stat->s_giveup, stat->s_stimeout, - stat->s_busy, stat->s_throttles); + stat->s_busy, stat->s_throttles, stat->s_enters); /* destination side statistics */ seq_printf(file, @@ -1438,6 +1476,14 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user, return -EFAULT; optstr[count - 1] = '\0'; + if (!strcmp(optstr, "on")) { + set_bau_on(); + return count; + } else if (!strcmp(optstr, "off")) { + set_bau_off(); + return count; + } + if (strict_strtol(optstr, 10, &input_arg) < 0) { printk(KERN_DEBUG "%s is invalid\n", optstr); return -EINVAL; @@ -1836,6 +1882,8 @@ static void __init init_per_cpu_tunables(void) for_each_present_cpu(cpu) { bcp = &per_cpu(bau_control, cpu); bcp->baudisabled = 0; + if (nobau) + bcp->nobau = 1; bcp->statp = &per_cpu(ptcstats, cpu); /* time interval to catch a hardware stay-busy bug */ bcp->timeout_interval = usec_2_cycles(2*timeout_us); @@ -2069,9 +2117,6 @@ static int __init uv_bau_init(void) if (!is_uv_system()) return 0; - if (nobau) - return 0; - for_each_possible_cpu(cur_cpu) { mask = &per_cpu(uv_flush_tlb_mask, cur_cpu); zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); @@ -2091,7 +2136,8 @@ static int __init uv_bau_init(void) enable_timeouts(); if (init_per_cpu(nuvhubs, uv_base_pnode)) { - nobau = 1; + set_bau_off(); + nobau_perm = 1; return 0; } -- cgit v1.2.3 From 8b6e511e51f7e540c8e71022318ee4cc9a4567a7 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Fri, 22 Jun 2012 08:14:59 -0500 Subject: x86/uv: Work around UV2 BAU hangs On SGI's UV2 the BAU (Broadcast Assist Unit) driver can hang under a heavy load. To cure this: - Disable the UV2 extended status mode (see UV2_EXT_SHFT), as this mode changes BAU behavior in more ways then just delivering an extra bit of status. Revert status to just two meaningful bits, like UV1. - Use no IPI-style resets on UV2. Just give up the request for whatever the reason it failed and let it be accomplished with the legacy IPI method. - Use no alternate sending descriptor (the former UV2 workaround bcp->using_desc and handle_uv2_busy() stuff). Just disable the use of the BAU for a period of time in favor of the legacy IPI method when the h/w bug leaves a descriptor busy. -- new tunable: giveup_limit determines the threshold at which a hub is so plugged that it should do all requests with the legacy IPI method for a period of time -- generalize disable_for_congestion() (renamed disable_for_period()) for use whenever a hub should avoid using the BAU for a period of time Also: - Fix find_another_by_swack(), which is part of the UV2 bug workaround - Correct and clarify the statistics (new stats s_overipilimit, s_giveuplimit, s_enters, s_ipifordisabled, s_plugged, s_congested) Signed-off-by: Cliff Wickman Link: http://lkml.kernel.org/r/20120622131459.GC31884@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 28 ++- arch/x86/platform/uv/tlb_uv.c | 387 ++++++++++++++++++--------------------- 2 files changed, 200 insertions(+), 215 deletions(-) diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 847c00b721b2..a06983cdc125 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -140,6 +140,9 @@ #define IPI_RESET_LIMIT 1 /* after this # consecutive successes, bump up the throttle if it was lowered */ #define COMPLETE_THRESHOLD 5 +/* after this # of giveups (fall back to kernel IPI's) disable the use of + the BAU for a period of time */ +#define GIVEUP_LIMIT 100 #define UV_LB_SUBNODEID 0x10 @@ -166,7 +169,6 @@ #define FLUSH_RETRY_TIMEOUT 2 #define FLUSH_GIVEUP 3 #define FLUSH_COMPLETE 4 -#define FLUSH_RETRY_BUSYBUG 5 /* * tuning the action when the numalink network is extremely delayed @@ -175,7 +177,7 @@ microseconds */ #define CONGESTED_REPS 10 /* long delays averaged over this many broadcasts */ -#define CONGESTED_PERIOD 30 /* time for the bau to be +#define DISABLED_PERIOD 10 /* time for the bau to be disabled, in seconds */ /* see msg_type: */ #define MSG_NOOP 0 @@ -520,7 +522,12 @@ struct ptc_stats { unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */ unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */ unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */ - unsigned long s_enters; /* entries to the driver */ + unsigned long s_overipilimit; /* over the ipi reset limit */ + unsigned long s_giveuplimit; /* disables, over giveup limit*/ + unsigned long s_enters; /* entries to the driver */ + unsigned long s_ipifordisabled; /* fall back to IPI; disabled */ + unsigned long s_plugged; /* plugged by h/w bug*/ + unsigned long s_congested; /* giveup on long wait */ /* destination statistics */ unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */ @@ -588,8 +595,7 @@ struct bau_control { int ipi_attempts; int conseccompletes; short nobau; - int baudisabled; - int set_bau_off; + short baudisabled; short cpu; short osnode; short uvhub_cpu; @@ -598,14 +604,16 @@ struct bau_control { short cpus_in_socket; short cpus_in_uvhub; short partition_base_pnode; - short using_desc; /* an index, like uvhub_cpu */ - unsigned int inuse_map; + short busy; /* all were busy (war) */ unsigned short message_number; unsigned short uvhub_quiesce; short socket_acknowledge_count[DEST_Q_SIZE]; cycles_t send_message; + cycles_t period_end; + cycles_t period_time; spinlock_t uvhub_lock; spinlock_t queue_lock; + spinlock_t disable_lock; /* tunables */ int max_concurr; int max_concurr_const; @@ -616,9 +624,9 @@ struct bau_control { int complete_threshold; int cong_response_us; int cong_reps; - int cong_period; - unsigned long clocks_per_100_usec; - cycles_t period_time; + cycles_t disabled_period; + int period_giveups; + int giveup_limit; long period_requests; struct hub_and_pnode *thp; }; diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 1492170cbb5a..71b5d5a07d7b 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1,7 +1,7 @@ /* * SGI UltraViolet TLB flush routines. * - * (c) 2008-2011 Cliff Wickman , SGI. + * (c) 2008-2012 Cliff Wickman , SGI. * * This code is released under the GNU General Public License version 2 or * later. @@ -39,8 +39,6 @@ static int timeout_base_ns[] = { static int timeout_us; static int nobau; static int nobau_perm; -static int baudisabled; -static spinlock_t disable_lock; static cycles_t congested_cycles; /* tunables: */ @@ -48,12 +46,13 @@ static int max_concurr = MAX_BAU_CONCURRENT; static int max_concurr_const = MAX_BAU_CONCURRENT; static int plugged_delay = PLUGGED_DELAY; static int plugsb4reset = PLUGSB4RESET; +static int giveup_limit = GIVEUP_LIMIT; static int timeoutsb4reset = TIMEOUTSB4RESET; static int ipi_reset_limit = IPI_RESET_LIMIT; static int complete_threshold = COMPLETE_THRESHOLD; static int congested_respns_us = CONGESTED_RESPONSE_US; static int congested_reps = CONGESTED_REPS; -static int congested_period = CONGESTED_PERIOD; +static int disabled_period = DISABLED_PERIOD; static struct tunables tunables[] = { {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ @@ -64,7 +63,8 @@ static struct tunables tunables[] = { {&complete_threshold, COMPLETE_THRESHOLD}, {&congested_respns_us, CONGESTED_RESPONSE_US}, {&congested_reps, CONGESTED_REPS}, - {&congested_period, CONGESTED_PERIOD} + {&disabled_period, DISABLED_PERIOD}, + {&giveup_limit, GIVEUP_LIMIT} }; static struct dentry *tunables_dir; @@ -313,7 +313,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, * Both sockets dump their completed count total into * the message's count. */ - smaster->socket_acknowledge_count[mdp->msg_slot] = 0; + *sp = 0; asp = (struct atomic_short *)&msg->acknowledge_count; msg_ack_count = atom_asr(socket_ack_count, asp); @@ -526,16 +526,15 @@ static int uv1_wait_completion(struct bau_desc *bau_desc, } /* - * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register. + * UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register. + * But not currently used. */ static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc) { unsigned long descriptor_status; - unsigned long descriptor_status2; - descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK); - descriptor_status2 = (read_mmr_uv2_status() >> desc) & 0x1UL; - descriptor_status = (descriptor_status << 1) | descriptor_status2; + descriptor_status = + ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1; return descriptor_status; } @@ -566,87 +565,11 @@ int normal_busy(struct bau_control *bcp) */ int handle_uv2_busy(struct bau_control *bcp) { - int busy_one = bcp->using_desc; - int normal = bcp->uvhub_cpu; - int selected = -1; - int i; - unsigned long descriptor_status; - unsigned long status; - int mmr_offset; - struct bau_desc *bau_desc_old; - struct bau_desc *bau_desc_new; - struct bau_control *hmaster = bcp->uvhub_master; struct ptc_stats *stat = bcp->statp; - cycles_t ttm; stat->s_uv2_wars++; - spin_lock(&hmaster->uvhub_lock); - /* try for the original first */ - if (busy_one != normal) { - if (!normal_busy(bcp)) - selected = normal; - } - if (selected < 0) { - /* can't use the normal, select an alternate */ - mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; - descriptor_status = read_lmmr(mmr_offset); - - /* scan available descriptors 32-63 */ - for (i = 0; i < UV_CPUS_PER_AS; i++) { - if ((hmaster->inuse_map & (1 << i)) == 0) { - status = ((descriptor_status >> - (i * UV_ACT_STATUS_SIZE)) & - UV_ACT_STATUS_MASK) << 1; - if (status != UV2H_DESC_BUSY) { - selected = i + UV_CPUS_PER_AS; - break; - } - } - } - } - - if (busy_one != normal) - /* mark the busy alternate as not in-use */ - hmaster->inuse_map &= ~(1 << (busy_one - UV_CPUS_PER_AS)); - - if (selected >= 0) { - /* switch to the selected descriptor */ - if (selected != normal) { - /* set the selected alternate as in-use */ - hmaster->inuse_map |= - (1 << (selected - UV_CPUS_PER_AS)); - if (selected > stat->s_uv2_wars_hw) - stat->s_uv2_wars_hw = selected; - } - bau_desc_old = bcp->descriptor_base; - bau_desc_old += (ITEMS_PER_DESC * busy_one); - bcp->using_desc = selected; - bau_desc_new = bcp->descriptor_base; - bau_desc_new += (ITEMS_PER_DESC * selected); - *bau_desc_new = *bau_desc_old; - } else { - /* - * All are busy. Wait for the normal one for this cpu to - * free up. - */ - stat->s_uv2_war_waits++; - spin_unlock(&hmaster->uvhub_lock); - ttm = get_cycles(); - do { - cpu_relax(); - } while (normal_busy(bcp)); - spin_lock(&hmaster->uvhub_lock); - /* switch to the original descriptor */ - bcp->using_desc = normal; - bau_desc_old = bcp->descriptor_base; - bau_desc_old += (ITEMS_PER_DESC * bcp->using_desc); - bcp->using_desc = (ITEMS_PER_DESC * normal); - bau_desc_new = bcp->descriptor_base; - bau_desc_new += (ITEMS_PER_DESC * normal); - *bau_desc_new = *bau_desc_old; /* copy the entire descriptor */ - } - spin_unlock(&hmaster->uvhub_lock); - return FLUSH_RETRY_BUSYBUG; + bcp->busy = 1; + return FLUSH_GIVEUP; } static int uv2_wait_completion(struct bau_desc *bau_desc, @@ -655,7 +578,7 @@ static int uv2_wait_completion(struct bau_desc *bau_desc, { unsigned long descriptor_stat; cycles_t ttm; - int desc = bcp->using_desc; + int desc = bcp->uvhub_cpu; long busy_reps = 0; struct ptc_stats *stat = bcp->statp; @@ -663,24 +586,38 @@ static int uv2_wait_completion(struct bau_desc *bau_desc, /* spin on the status MMR, waiting for it to go idle */ while (descriptor_stat != UV2H_DESC_IDLE) { - /* - * Our software ack messages may be blocked because - * there are no swack resources available. As long - * as none of them has timed out hardware will NACK - * our message and its state will stay IDLE. - */ - if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) || - (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) { + if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) { + /* + * A h/w bug on the destination side may + * have prevented the message being marked + * pending, thus it doesn't get replied to + * and gets continually nacked until it times + * out with a SOURCE_TIMEOUT. + */ stat->s_stimeout++; return FLUSH_GIVEUP; - } else if (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) { - stat->s_strongnacks++; - bcp->conseccompletes = 0; - return FLUSH_GIVEUP; } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { + ttm = get_cycles(); + + /* + * Our retries may be blocked by all destination + * swack resources being consumed, and a timeout + * pending. In that case hardware returns the + * ERROR that looks like a destination timeout. + * Without using the extended status we have to + * deduce from the short time that this was a + * strong nack. + */ + if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { + bcp->conseccompletes = 0; + stat->s_plugged++; + /* FLUSH_RETRY_PLUGGED causes hang on boot */ + return FLUSH_GIVEUP; + } stat->s_dtimeout++; bcp->conseccompletes = 0; - return FLUSH_RETRY_TIMEOUT; + /* FLUSH_RETRY_TIMEOUT causes hang on boot */ + return FLUSH_GIVEUP; } else { busy_reps++; if (busy_reps > 1000000) { @@ -688,9 +625,8 @@ static int uv2_wait_completion(struct bau_desc *bau_desc, busy_reps = 0; ttm = get_cycles(); if ((ttm - bcp->send_message) > - (bcp->clocks_per_100_usec)) { + bcp->timeout_interval) return handle_uv2_busy(bcp); - } } /* * descriptor_stat is still BUSY @@ -714,7 +650,7 @@ static int wait_completion(struct bau_desc *bau_desc, { int right_shift; unsigned long mmr_offset; - int desc = bcp->using_desc; + int desc = bcp->uvhub_cpu; if (desc < UV_CPUS_PER_AS) { mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; @@ -793,33 +729,31 @@ static void destination_timeout(struct bau_desc *bau_desc, } /* - * Completions are taking a very long time due to a congested numalink - * network. + * Stop all cpus on a uvhub from using the BAU for a period of time. + * This is reversed by check_enable. */ -static void disable_for_congestion(struct bau_control *bcp, - struct ptc_stats *stat) +static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) { - /* let only one cpu do this disabling */ - spin_lock(&disable_lock); - - if (!baudisabled && bcp->period_requests && - ((bcp->period_time / bcp->period_requests) > congested_cycles)) { - int tcpu; - struct bau_control *tbcp; - /* it becomes this cpu's job to turn on the use of the - BAU again */ - baudisabled = 1; - bcp->set_bau_off = 1; - bcp->set_bau_on_time = get_cycles(); - bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period); + int tcpu; + struct bau_control *tbcp; + struct bau_control *hmaster; + cycles_t tm1; + + hmaster = bcp->uvhub_master; + spin_lock(&hmaster->disable_lock); + if (!bcp->baudisabled) { stat->s_bau_disabled++; + tm1 = get_cycles(); for_each_present_cpu(tcpu) { tbcp = &per_cpu(bau_control, tcpu); - tbcp->baudisabled = 1; + if (tbcp->uvhub_master == hmaster) { + tbcp->baudisabled = 1; + tbcp->set_bau_on_time = + tm1 + bcp->disabled_period; + } } } - - spin_unlock(&disable_lock); + spin_unlock(&hmaster->disable_lock); } static void count_max_concurr(int stat, struct bau_control *bcp, @@ -850,16 +784,30 @@ static void record_send_stats(cycles_t time1, cycles_t time2, bcp->period_requests++; bcp->period_time += elapsed; if ((elapsed > congested_cycles) && - (bcp->period_requests > bcp->cong_reps)) - disable_for_congestion(bcp, stat); + (bcp->period_requests > bcp->cong_reps) && + ((bcp->period_time / bcp->period_requests) > + congested_cycles)) { + stat->s_congested++; + disable_for_period(bcp, stat); + } } } else stat->s_requestor--; if (completion_status == FLUSH_COMPLETE && try > 1) stat->s_retriesok++; - else if (completion_status == FLUSH_GIVEUP) + else if (completion_status == FLUSH_GIVEUP) { stat->s_giveup++; + if (get_cycles() > bcp->period_end) + bcp->period_giveups = 0; + bcp->period_giveups++; + if (bcp->period_giveups == 1) + bcp->period_end = get_cycles() + bcp->disabled_period; + if (bcp->period_giveups > bcp->giveup_limit) { + disable_for_period(bcp, stat); + stat->s_giveuplimit++; + } + } } /* @@ -903,7 +851,8 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc, * Returns 1 if it gives up entirely and the original cpu mask is to be * returned to the kernel. */ -int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) +int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp, + struct bau_desc *bau_desc) { int seq_number = 0; int completion_stat = 0; @@ -916,24 +865,23 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) struct bau_control *hmaster = bcp->uvhub_master; struct uv1_bau_msg_header *uv1_hdr = NULL; struct uv2_bau_msg_header *uv2_hdr = NULL; - struct bau_desc *bau_desc; - if (bcp->uvhub_version == 1) + if (bcp->uvhub_version == 1) { + uv1 = 1; uv1_throttle(hmaster, stat); + } while (hmaster->uvhub_quiesce) cpu_relax(); time1 = get_cycles(); + if (uv1) + uv1_hdr = &bau_desc->header.uv1_hdr; + else + uv2_hdr = &bau_desc->header.uv2_hdr; + do { - bau_desc = bcp->descriptor_base; - bau_desc += (ITEMS_PER_DESC * bcp->using_desc); - if (bcp->uvhub_version == 1) { - uv1 = 1; - uv1_hdr = &bau_desc->header.uv1_hdr; - } else - uv2_hdr = &bau_desc->header.uv2_hdr; - if ((try == 0) || (completion_stat == FLUSH_RETRY_BUSYBUG)) { + if (try == 0) { if (uv1) uv1_hdr->msg_type = MSG_REGULAR; else @@ -951,25 +899,24 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) uv1_hdr->sequence = seq_number; else uv2_hdr->sequence = seq_number; - index = (1UL << AS_PUSH_SHIFT) | bcp->using_desc; + index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu; bcp->send_message = get_cycles(); write_mmr_activation(index); try++; completion_stat = wait_completion(bau_desc, bcp, try); - /* UV2: wait_completion() may change the bcp->using_desc */ handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { bcp->ipi_attempts = 0; + stat->s_overipilimit++; completion_stat = FLUSH_GIVEUP; break; } cpu_relax(); } while ((completion_stat == FLUSH_RETRY_PLUGGED) || - (completion_stat == FLUSH_RETRY_BUSYBUG) || (completion_stat == FLUSH_RETRY_TIMEOUT)); time2 = get_cycles(); @@ -990,28 +937,33 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) } /* - * The BAU is disabled. When the disabled time period has expired, the cpu - * that disabled it must re-enable it. - * Return 0 if it is re-enabled for all cpus. + * The BAU is disabled for this uvhub. When the disabled time period has + * expired re-enable it. + * Return 0 if it is re-enabled for all cpus on this uvhub. */ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) { int tcpu; struct bau_control *tbcp; + struct bau_control *hmaster; - if (bcp->set_bau_off) { - if (get_cycles() >= bcp->set_bau_on_time) { - stat->s_bau_reenabled++; - baudisabled = 0; - for_each_present_cpu(tcpu) { - tbcp = &per_cpu(bau_control, tcpu); + hmaster = bcp->uvhub_master; + spin_lock(&hmaster->disable_lock); + if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { + stat->s_bau_reenabled++; + for_each_present_cpu(tcpu) { + tbcp = &per_cpu(bau_control, tcpu); + if (tbcp->uvhub_master == hmaster) { tbcp->baudisabled = 0; tbcp->period_requests = 0; tbcp->period_time = 0; + tbcp->period_giveups = 0; } - return 0; } + spin_unlock(&hmaster->disable_lock); + return 0; } + spin_unlock(&hmaster->disable_lock); return -1; } @@ -1113,6 +1065,8 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, struct cpumask *flush_mask; struct ptc_stats *stat; struct bau_control *bcp; + unsigned long descriptor_status; + unsigned long status; bcp = &per_cpu(bau_control, cpu); stat = bcp->statp; @@ -1121,10 +1075,22 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, if (bcp->nobau) return cpumask; + if (bcp->busy) { + descriptor_status = + read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0); + status = ((descriptor_status >> (bcp->uvhub_cpu * + UV_ACT_STATUS_SIZE)) & UV_ACT_STATUS_MASK) << 1; + if (status == UV2H_DESC_BUSY) + return cpumask; + bcp->busy = 0; + } + /* bau was disabled due to slow response */ if (bcp->baudisabled) { - if (check_enable(bcp, stat)) + if (check_enable(bcp, stat)) { + stat->s_ipifordisabled++; return cpumask; + } } /* @@ -1140,7 +1106,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, stat->s_ntargself++; bau_desc = bcp->descriptor_base; - bau_desc += (ITEMS_PER_DESC * bcp->using_desc); + bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu); bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) return NULL; @@ -1153,25 +1119,27 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, * uv_flush_send_and_wait returns 0 if all cpu's were messaged, * or 1 if it gave up and the original cpumask should be returned. */ - if (!uv_flush_send_and_wait(flush_mask, bcp)) + if (!uv_flush_send_and_wait(flush_mask, bcp, bau_desc)) return NULL; else return cpumask; } /* - * Search the message queue for any 'other' message with the same software - * acknowledge resource bit vector. + * Search the message queue for any 'other' unprocessed message with the + * same software acknowledge resource bit vector as the 'msg' message. */ struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, - struct bau_control *bcp, unsigned char swack_vec) + struct bau_control *bcp) { struct bau_pq_entry *msg_next = msg + 1; + unsigned char swack_vec = msg->swack_vec; if (msg_next > bcp->queue_last) msg_next = bcp->queue_first; - while ((msg_next->swack_vec != 0) && (msg_next != msg)) { - if (msg_next->swack_vec == swack_vec) + while (msg_next != msg) { + if ((msg_next->canceled == 0) && (msg_next->replied_to == 0) && + (msg_next->swack_vec == swack_vec)) return msg_next; msg_next++; if (msg_next > bcp->queue_last) @@ -1200,32 +1168,30 @@ void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp) * This message was assigned a swack resource, but no * reserved acknowlegment is pending. * The bug has prevented this message from setting the MMR. - * And no other message has used the same sw_ack resource. - * Do the requested shootdown but do not reply to the msg. - * (the 0 means make no acknowledge) */ - bau_process_message(mdp, bcp, 0); - return; - } - - /* - * Some message has set the MMR 'pending' bit; it might have been - * another message. Look for that message. - */ - other_msg = find_another_by_swack(msg, bcp, msg->swack_vec); - if (other_msg) { - /* There is another. Do not ack the current one. */ - bau_process_message(mdp, bcp, 0); /* - * Let the natural processing of that message acknowledge - * it. Don't get the processing of sw_ack's out of order. + * Some message has set the MMR 'pending' bit; it might have + * been another message. Look for that message. */ - return; + other_msg = find_another_by_swack(msg, bcp); + if (other_msg) { + /* + * There is another. Process this one but do not + * ack it. + */ + bau_process_message(mdp, bcp, 0); + /* + * Let the natural processing of that other message + * acknowledge it. Don't get the processing of sw_ack's + * out of order. + */ + return; + } } /* - * There is no other message using this sw_ack, so it is safe to - * acknowledge it. + * Either the MMR shows this one pending a reply or there is no + * other message using this sw_ack, so it is safe to acknowledge it. */ bau_process_message(mdp, bcp, 1); @@ -1330,7 +1296,8 @@ static void __init enable_timeouts(void) */ mmr_image |= (1L << SOFTACK_MSHIFT); if (is_uv2_hub()) { - mmr_image |= (1L << UV2_EXT_SHFT); + /* hw bug workaround; do not use extended status */ + mmr_image &= ~(1L << UV2_EXT_SHFT); } write_mmr_misc_control(pnode, mmr_image); } @@ -1379,24 +1346,26 @@ static int ptc_seq_show(struct seq_file *file, void *data) cpu = *(loff_t *)data; if (!cpu) { seq_printf(file, - "# cpu bauoff sent stime self locals remotes ncpus localhub "); + "# cpu bauoff sent stime self locals remotes ncpus localhub "); seq_printf(file, "remotehub numuvhubs numuvhubs16 numuvhubs8 "); seq_printf(file, - "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok "); + "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries "); + seq_printf(file, + "rok resetp resett giveup sto bz throt disable "); seq_printf(file, - "resetp resett giveup sto bz throt enters swack recv rtime "); + "enable wars warshw warwaits enters ipidis plugged "); seq_printf(file, - "all one mult none retry canc nocan reset rcan "); + "ipiover glim cong swack recv rtime all one mult "); seq_printf(file, - "disable enable wars warshw warwaits\n"); + "none retry canc nocan reset rcan\n"); } if (cpu < num_possible_cpus() && cpu_online(cpu)) { bcp = &per_cpu(bau_control, cpu); stat = bcp->statp; /* source side statistics */ seq_printf(file, - "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", + "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", cpu, bcp->nobau, stat->s_requestor, cycles_2_us(stat->s_time), stat->s_ntargself, stat->s_ntarglocals, @@ -1407,25 +1376,28 @@ static int ptc_seq_show(struct seq_file *file, void *data) stat->s_ntarguvhub8, stat->s_ntarguvhub4, stat->s_ntarguvhub2, stat->s_ntarguvhub1, stat->s_dtimeout, stat->s_strongnacks); - seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld ", + seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", stat->s_retry_messages, stat->s_retriesok, stat->s_resets_plug, stat->s_resets_timeout, stat->s_giveup, stat->s_stimeout, - stat->s_busy, stat->s_throttles, stat->s_enters); + stat->s_busy, stat->s_throttles); + seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", + stat->s_bau_disabled, stat->s_bau_reenabled, + stat->s_uv2_wars, stat->s_uv2_wars_hw, + stat->s_uv2_war_waits, stat->s_enters, + stat->s_ipifordisabled, stat->s_plugged, + stat->s_overipilimit, stat->s_giveuplimit, + stat->s_congested); /* destination side statistics */ seq_printf(file, - "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", + "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), stat->d_requestee, cycles_2_us(stat->d_time), stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, stat->d_nomsg, stat->d_retries, stat->d_canceled, stat->d_nocanceled, stat->d_resets, stat->d_rcanceled); - seq_printf(file, "%ld %ld %ld %ld %ld\n", - stat->s_bau_disabled, stat->s_bau_reenabled, - stat->s_uv2_wars, stat->s_uv2_wars_hw, - stat->s_uv2_war_waits); } return 0; } @@ -1439,13 +1411,14 @@ static ssize_t tunables_read(struct file *file, char __user *userbuf, char *buf; int ret; - buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", - "max_concur plugged_delay plugsb4reset", - "timeoutsb4reset ipi_reset_limit complete_threshold", - "congested_response_us congested_reps congested_period", + buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d %d\n", + "max_concur plugged_delay plugsb4reset timeoutsb4reset", + "ipi_reset_limit complete_threshold congested_response_us", + "congested_reps disabled_period giveup_limit", max_concurr, plugged_delay, plugsb4reset, timeoutsb4reset, ipi_reset_limit, complete_threshold, - congested_respns_us, congested_reps, congested_period); + congested_respns_us, congested_reps, disabled_period, + giveup_limit); if (!buf) return -ENOMEM; @@ -1616,7 +1589,8 @@ static ssize_t tunables_write(struct file *file, const char __user *user, bcp->complete_threshold = complete_threshold; bcp->cong_response_us = congested_respns_us; bcp->cong_reps = congested_reps; - bcp->cong_period = congested_period; + bcp->disabled_period = sec_2_cycles(disabled_period); + bcp->giveup_limit = giveup_limit; } return count; } @@ -1745,6 +1719,10 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode) * fairness chaining multilevel count replied_to */ } else { + /* + * BIOS uses legacy mode, but UV2 hardware always + * uses native mode for selective broadcasts. + */ uv2_hdr = &bd2->header.uv2_hdr; uv2_hdr->swack_flag = 1; uv2_hdr->base_dest_nasid = @@ -1896,10 +1874,11 @@ static void __init init_per_cpu_tunables(void) bcp->complete_threshold = complete_threshold; bcp->cong_response_us = congested_respns_us; bcp->cong_reps = congested_reps; - bcp->cong_period = congested_period; - bcp->clocks_per_100_usec = usec_2_cycles(100); + bcp->disabled_period = sec_2_cycles(disabled_period); + bcp->giveup_limit = giveup_limit; spin_lock_init(&bcp->queue_lock); spin_lock_init(&bcp->uvhub_lock); + spin_lock_init(&bcp->disable_lock); } } @@ -2020,7 +1999,6 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp, } bcp->uvhub_master = *hmasterp; bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; - bcp->using_desc = bcp->uvhub_cpu; if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { printk(KERN_EMERG "%d cpus per uvhub invalid\n", bcp->uvhub_cpu); @@ -2123,7 +2101,6 @@ static int __init uv_bau_init(void) } nuvhubs = uv_num_possible_blades(); - spin_lock_init(&disable_lock); congested_cycles = usec_2_cycles(congested_respns_us); uv_base_pnode = 0x7fffffff; -- cgit v1.2.3 From 35a468732289372aab506d7cf73f98f0379888ae Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 21 Jun 2012 17:52:52 +0900 Subject: perf evsel: Fix a build failure on cross compilation The commit c410431cefefd ("perf tools: Reconstruct event with modifiers from perf_event_attr") added the line, but it's broken since it needs to go up 3 directories to get to the kernel root directory, not 2. However host gcc contains /usr/local/include in its search path, so that it can find the perf_event.h in /usr/include. This why we didn't notice the problem yet. But when I tried to cross compile it appears like: CC util/evsel.o util/evsel.c:18:44: error: ../../include/linux/perf_event.h: No such file or directory make: *** [util/evsel.o] Error 1 Looking at the source, it isn't needed at all as evsel.h already included the perf_event.h. So simply remove it would solve the problem. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1340268772-5737-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 876f639d69ed..3d1f6968f175 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -15,7 +15,6 @@ #include "cpumap.h" #include "thread_map.h" #include "target.h" -#include "../../include/linux/perf_event.h" #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0)) -- cgit v1.2.3 From 7a25b2d32b9cb0b813d56ee6109acf90f3c9f1e5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 21 Jun 2012 12:25:16 +0200 Subject: perf test: Fix parse events test to follow proper raw event name Following commit changed raw event names to carry event modificator. perf evsel: Reconstruct raw event with modifiers from perf_event_attr commit 6eef3d9c2bcf52b7a3c18e609f5838c007b989a4 Author: Arnaldo Carvalho de Melo The perf_evsel__name function now returns ':mod' suffix for raw events, so we need to follow that in current tests. All tests pass now for 'perf test parse' suite. Signed-off-by: Jiri Olsa Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1340274316-5161-1-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events-test.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index 229af6da33a2..a0f61a2a6835 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -413,19 +413,20 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist) { struct perf_evsel *evsel; - /* cpu/config=1,name=krava1/u */ + /* cpu/config=1,name=krava/u */ evsel = list_entry(evlist->entries.next, struct perf_evsel, node); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava")); - /* cpu/config=2/" */ + /* cpu/config=2/u" */ evsel = list_entry(evsel->node.next, struct perf_evsel, node); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config); - TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "raw 0x2")); + TEST_ASSERT_VAL("wrong name", + !strcmp(perf_evsel__name(evsel), "raw 0x2:u")); return 0; } -- cgit v1.2.3 From 4069d6f86bebce1a1e3456ef721511b4b81958f8 Mon Sep 17 00:00:00 2001 From: Mattia Dongili Date: Sat, 9 Jun 2012 13:18:08 +0900 Subject: sony-laptop: use an enum for SNC event types Signed-off-by: Mattia Dongili Signed-off-by: Matthew Garrett --- drivers/platform/x86/sony-laptop.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 210d4ae547c2..615ab06b6cd3 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -1172,6 +1172,10 @@ static int sony_nc_hotkeys_decode(u32 event, unsigned int handle) /* * ACPI callbacks */ +enum event_types { + HOTKEY = 1, + KILLSWITCH +}; static void sony_nc_notify(struct acpi_device *device, u32 event) { u32 real_ev = event; @@ -1196,7 +1200,7 @@ static void sony_nc_notify(struct acpi_device *device, u32 event) /* hotkey event */ case 0x0100: case 0x0127: - ev_type = 1; + ev_type = HOTKEY; real_ev = sony_nc_hotkeys_decode(event, handle); if (real_ev > 0) @@ -1216,7 +1220,7 @@ static void sony_nc_notify(struct acpi_device *device, u32 event) * update the rfkill device status when the * switch is moved. */ - ev_type = 2; + ev_type = KILLSWITCH; sony_call_snc_handle(handle, 0x0100, &result); real_ev = result & 0x03; @@ -1238,7 +1242,7 @@ static void sony_nc_notify(struct acpi_device *device, u32 event) } else { /* old style event */ - ev_type = 1; + ev_type = HOTKEY; sony_laptop_report_input_event(real_ev); } -- cgit v1.2.3 From bb384b5295323ed58260aeaff22d8bbe32988396 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Sat, 9 Jun 2012 13:18:09 +0900 Subject: sony-laptop: notify userspace of GFX switch position changes Some Vaios come with both integrated and discrete graphics, plus a switch for choosing one of the two. When the switch position is changed, a notification is generated. Signed-off-by: Marco Chiappero Signed-off-by: Mattia Dongili Signed-off-by: Matthew Garrett --- drivers/platform/x86/sony-laptop.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 615ab06b6cd3..4f42e5661bf5 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -1174,7 +1174,8 @@ static int sony_nc_hotkeys_decode(u32 event, unsigned int handle) */ enum event_types { HOTKEY = 1, - KILLSWITCH + KILLSWITCH, + GFX_SWITCH }; static void sony_nc_notify(struct acpi_device *device, u32 event) { @@ -1230,6 +1231,24 @@ static void sony_nc_notify(struct acpi_device *device, u32 event) break; + case 0x0128: + case 0x0146: + /* Hybrid GFX switching */ + sony_call_snc_handle(handle, 0x0000, &result); + dprintk("GFX switch event received (reason: %s)\n", + (result & 0x01) ? + "switch change" : "unknown"); + + /* verify the switch state + * 1: discrete GFX + * 0: integrated GFX + */ + sony_call_snc_handle(handle, 0x0100, &result); + + ev_type = GFX_SWITCH; + real_ev = result & 0xff; + break; + default: dprintk("Unknown event 0x%x for handle 0x%x\n", event, handle); -- cgit v1.2.3 From 15aa5c75468a103cdee1a0e0ec26aad979bf71a5 Mon Sep 17 00:00:00 2001 From: Mattia Dongili Date: Mon, 11 Jun 2012 07:18:31 +0900 Subject: sony-laptop: store battery care limits on batteries Some models offer the option to store the limits on the battery (firmware?). Signed-off-by: Mattia Dongili Signed-off-by: Matthew Garrett --- drivers/platform/x86/sony-laptop.c | 41 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 4f42e5661bf5..f045e3e59cd6 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -1916,32 +1916,33 @@ static ssize_t sony_nc_battery_care_limit_store(struct device *dev, * bits 4,5: store the limit into the EC * bits 6,7: store the limit into the battery */ + cmd = 0; - /* - * handle 0x0115 should allow storing on battery too; - * handle 0x0136 same as 0x0115 + health status; - * handle 0x013f, same as 0x0136 but no storing on the battery - * - * Store only inside the EC for now, regardless the handle number - */ - if (value == 0) - /* disable limits */ - cmd = 0x0; + if (value > 0) { + if (value <= 50) + cmd = 0x20; - else if (value <= 50) - cmd = 0x21; + else if (value <= 80) + cmd = 0x10; - else if (value <= 80) - cmd = 0x11; + else if (value <= 100) + cmd = 0x30; + + else + return -EINVAL; - else if (value <= 100) - cmd = 0x31; + /* + * handle 0x0115 should allow storing on battery too; + * handle 0x0136 same as 0x0115 + health status; + * handle 0x013f, same as 0x0136 but no storing on the battery + */ + if (bcare_ctl->handle != 0x013f) + cmd = cmd | (cmd << 2); - else - return -EINVAL; + cmd = (cmd | 0x1) << 0x10; + } - if (sony_call_snc_handle(bcare_ctl->handle, (cmd << 0x10) | 0x0100, - &result)) + if (sony_call_snc_handle(bcare_ctl->handle, cmd | 0x0100, &result)) return -EIO; return count; -- cgit v1.2.3 From 014fc8fbece33d42e2aa92d289fffa213a159321 Mon Sep 17 00:00:00 2001 From: Mattia Dongili Date: Sat, 9 Jun 2012 13:18:11 +0900 Subject: sony-laptop: add lid backlight support for handle 0x143 Signed-off-by: Mattia Dongili Signed-off-by: Matthew Garrett --- drivers/platform/x86/sony-laptop.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index f045e3e59cd6..a94e13fe1f42 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -1010,6 +1010,7 @@ static ssize_t sony_nc_sysfs_store(struct device *dev, struct sony_backlight_props { struct backlight_device *dev; int handle; + int cmd_base; u8 offset; u8 maxlvl; }; @@ -1037,7 +1038,7 @@ static int sony_nc_get_brightness_ng(struct backlight_device *bd) struct sony_backlight_props *sdev = (struct sony_backlight_props *)bl_get_data(bd); - sony_call_snc_handle(sdev->handle, 0x0200, &result); + sony_call_snc_handle(sdev->handle, sdev->cmd_base + 0x100, &result); return (result & 0xff) - sdev->offset; } @@ -1049,7 +1050,8 @@ static int sony_nc_update_status_ng(struct backlight_device *bd) (struct sony_backlight_props *)bl_get_data(bd); value = bd->props.brightness + sdev->offset; - if (sony_call_snc_handle(sdev->handle, 0x0100 | (value << 16), &result)) + if (sony_call_snc_handle(sdev->handle, sdev->cmd_base | (value << 0x10), + &result)) return -EIO; return value; @@ -2496,6 +2498,7 @@ static void sony_nc_backlight_ng_read_limits(int handle, { u64 offset; int i; + int lvl_table_len = 0; u8 min = 0xff, max = 0x00; unsigned char buffer[32] = { 0 }; @@ -2515,11 +2518,21 @@ static void sony_nc_backlight_ng_read_limits(int handle, if (i < 0) return; + switch (handle) { + case 0x012f: + case 0x0137: + lvl_table_len = 9; + break; + case 0x143: + lvl_table_len = 16; + break; + } + /* the buffer lists brightness levels available, brightness levels are * from position 0 to 8 in the array, other values are used by ALS * control. */ - for (i = 0; i < 9 && i < ARRAY_SIZE(buffer); i++) { + for (i = 0; i < lvl_table_len && i < ARRAY_SIZE(buffer); i++) { dprintk("Brightness level: %d\n", buffer[i]); @@ -2546,14 +2559,22 @@ static void sony_nc_backlight_setup(void) if (sony_find_snc_handle(0x12f) != -1) { ops = &sony_backlight_ng_ops; + sony_bl_props.cmd_base = 0x0100; sony_nc_backlight_ng_read_limits(0x12f, &sony_bl_props); max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset; } else if (sony_find_snc_handle(0x137) != -1) { ops = &sony_backlight_ng_ops; + sony_bl_props.cmd_base = 0x0100; sony_nc_backlight_ng_read_limits(0x137, &sony_bl_props); max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset; + } else if (sony_find_snc_handle(0x143) != -1) { + ops = &sony_backlight_ng_ops; + sony_bl_props.cmd_base = 0x3000; + sony_nc_backlight_ng_read_limits(0x143, &sony_bl_props); + max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset; + } else if (ACPI_SUCCESS(acpi_get_handle(sony_nc_acpi_handle, "GBRT", &unused))) { ops = &sony_backlight_ops; -- cgit v1.2.3 From ca3c2c706de39b3400e57254dce054bf7350efa2 Mon Sep 17 00:00:00 2001 From: Mattia Dongili Date: Sat, 9 Jun 2012 13:18:12 +0900 Subject: sony-laptop: input initialization should be done before SNC SNC needs input devices so better have those ready before starting handle events. Signed-off-by: Mattia Dongili Signed-off-by: Matthew Garrett --- drivers/platform/x86/sony-laptop.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index a94e13fe1f42..89ff6d845f34 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -2642,6 +2642,12 @@ static int sony_nc_add(struct acpi_device *device) } } + result = sony_laptop_setup_input(device); + if (result) { + pr_err("Unable to create input devices\n"); + goto outplatform; + } + if (ACPI_SUCCESS(acpi_get_handle(sony_nc_acpi_handle, "ECON", &handle))) { int arg = 1; @@ -2659,12 +2665,6 @@ static int sony_nc_add(struct acpi_device *device) } /* setup input devices and helper fifo */ - result = sony_laptop_setup_input(device); - if (result) { - pr_err("Unable to create input devices\n"); - goto outsnc; - } - if (acpi_video_backlight_support()) { pr_info("brightness ignored, must be controlled by ACPI video driver\n"); } else { @@ -2712,22 +2712,21 @@ static int sony_nc_add(struct acpi_device *device) return 0; - out_sysfs: +out_sysfs: for (item = sony_nc_values; item->name; ++item) { device_remove_file(&sony_pf_device->dev, &item->devattr); } sony_nc_backlight_cleanup(); - - sony_laptop_remove_input(); - - outsnc: sony_nc_function_cleanup(sony_pf_device); sony_nc_handles_cleanup(sony_pf_device); - outpresent: +outplatform: + sony_laptop_remove_input(); + +outpresent: sony_pf_remove(); - outwalk: +outwalk: sony_nc_rfkill_cleanup(); return result; } -- cgit v1.2.3 From c7a2918373983b32db3ca35823d930641747e26f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 9 Jun 2012 13:18:13 +0900 Subject: sony-laptop: fix sony_nc_sysfs_store() We made this an unsigned long and it causes a bug on 64 bit big endian systems when we try to pass the value to sony_nc_int_call(). Also value has to be signed because validate() returns negative error codes. Signed-off-by: Dan Carpenter Signed-off-by: Mattia Dongili Signed-off-by: Matthew Garrett --- drivers/platform/x86/sony-laptop.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 89ff6d845f34..78e6389d2cae 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -973,7 +973,7 @@ static ssize_t sony_nc_sysfs_store(struct device *dev, struct device_attribute *attr, const char *buffer, size_t count) { - unsigned long value = 0; + int value; int ret = 0; struct sony_nc_value *item = container_of(attr, struct sony_nc_value, devattr); @@ -984,7 +984,7 @@ static ssize_t sony_nc_sysfs_store(struct device *dev, if (count > 31) return -EINVAL; - if (kstrtoul(buffer, 10, &value)) + if (kstrtoint(buffer, 10, &value)) return -EINVAL; if (item->validate) @@ -994,7 +994,7 @@ static ssize_t sony_nc_sysfs_store(struct device *dev, return value; ret = sony_nc_int_call(sony_nc_acpi_handle, *item->acpiset, - (int *)&value, NULL); + &value, NULL); if (ret < 0) return -EIO; -- cgit v1.2.3 From 56f4a9f76d8ce7c7cef92905c909aad0c7e5c9db Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 9 Jun 2012 13:18:14 +0900 Subject: sony-laptop: fix a couple signedness bugs This needs to be signed to handle negative error codes. Remove a redundant check, read_limits is always called with a valid handle. Signed-off-by: Dan Carpenter Signed-off-by: Mattia Dongili Signed-off-by: Matthew Garrett --- drivers/platform/x86/sony-laptop.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 78e6389d2cae..b9499b65c1cd 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -2139,7 +2139,7 @@ static ssize_t sony_nc_thermal_mode_show(struct device *dev, struct device_attribute *attr, char *buffer) { ssize_t count = 0; - unsigned int mode = sony_nc_thermal_mode_get(); + int mode = sony_nc_thermal_mode_get(); if (mode < 0) return mode; @@ -2507,8 +2507,6 @@ static void sony_nc_backlight_ng_read_limits(int handle, props->maxlvl = 0xff; offset = sony_find_snc_handle(handle); - if (offset < 0) - return; /* try to read the boundaries from ACPI tables, if we fail the above * defaults should be reasonable -- cgit v1.2.3 From a1071a5abf1f4d4a7f8324e21b8b32b1342013c7 Mon Sep 17 00:00:00 2001 From: Mattia Dongili Date: Thu, 14 Jun 2012 06:36:02 +0900 Subject: sony-laptop: correct find_snc_handle failure checks Since bab7084c745bf4d75b760728387f375fd34dc683, find_snc_handle returns -EINVAL, not -1. Signed-off-by: Mattia Dongili Signed-off-by: Matthew Garrett --- drivers/platform/x86/sony-laptop.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index b9499b65c1cd..d456ff0c73b7 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -2555,19 +2555,19 @@ static void sony_nc_backlight_setup(void) const struct backlight_ops *ops = NULL; struct backlight_properties props; - if (sony_find_snc_handle(0x12f) != -1) { + if (sony_find_snc_handle(0x12f) >= 0) { ops = &sony_backlight_ng_ops; sony_bl_props.cmd_base = 0x0100; sony_nc_backlight_ng_read_limits(0x12f, &sony_bl_props); max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset; - } else if (sony_find_snc_handle(0x137) != -1) { + } else if (sony_find_snc_handle(0x137) >= 0) { ops = &sony_backlight_ng_ops; sony_bl_props.cmd_base = 0x0100; sony_nc_backlight_ng_read_limits(0x137, &sony_bl_props); max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset; - } else if (sony_find_snc_handle(0x143) != -1) { + } else if (sony_find_snc_handle(0x143) >= 0) { ops = &sony_backlight_ng_ops; sony_bl_props.cmd_base = 0x3000; sony_nc_backlight_ng_read_limits(0x143, &sony_bl_props); -- cgit v1.2.3 From 57f9616b79549e772cf4dd3aa1d2df5b6c8acdfa Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 12 Jun 2012 19:28:50 +0300 Subject: ideapad: uninitialized data in ideapad_acpi_add() We only initialize the high bits of "cfg". It probably doesn't cause a problem given that this is platform specific code and doesn't have to worry about endianness etc. But it's sort of messy. Signed-off-by: Dan Carpenter Signed-off-by: Matthew Garrett --- drivers/platform/x86/ideapad-laptop.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 4f20f8dd3d7c..17f6dfd8dbfb 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -694,10 +694,10 @@ MODULE_DEVICE_TABLE(acpi, ideapad_device_ids); static int __devinit ideapad_acpi_add(struct acpi_device *adevice) { int ret, i; - unsigned long cfg; + int cfg; struct ideapad_private *priv; - if (read_method_int(adevice->handle, "_CFG", (int *)&cfg)) + if (read_method_int(adevice->handle, "_CFG", &cfg)) return -ENODEV; priv = kzalloc(sizeof(*priv), GFP_KERNEL); @@ -721,7 +721,7 @@ static int __devinit ideapad_acpi_add(struct acpi_device *adevice) goto input_failed; for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++) { - if (test_bit(ideapad_rfk_data[i].cfgbit, &cfg)) + if (test_bit(ideapad_rfk_data[i].cfgbit, &priv->cfg)) ideapad_register_rfkill(adevice, i); else priv->rfk[i] = NULL; -- cgit v1.2.3 From 88ca518b0bb4161e5f20f8a1d9cc477cae294e54 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 25 Jun 2012 15:07:17 +0200 Subject: intel_ips: blacklist HP ProBook laptops intel_ips driver spews the warning message "ME failed to update for more than 1s, likely hung" at each second endlessly on HP ProBook laptops with IronLake. As this has never worked, better to blacklist the driver for now. Cc: Signed-off-by: Takashi Iwai Signed-off-by: Matthew Garrett --- drivers/platform/x86/intel_ips.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c index 0ffdb3cde2bb..9af4257d4901 100644 --- a/drivers/platform/x86/intel_ips.c +++ b/drivers/platform/x86/intel_ips.c @@ -72,6 +72,7 @@ #include #include #include +#include #include #include #include @@ -1485,6 +1486,24 @@ static DEFINE_PCI_DEVICE_TABLE(ips_id_table) = { MODULE_DEVICE_TABLE(pci, ips_id_table); +static int ips_blacklist_callback(const struct dmi_system_id *id) +{ + pr_info("Blacklisted intel_ips for %s\n", id->ident); + return 1; +} + +static const struct dmi_system_id ips_blacklist[] = { + { + .callback = ips_blacklist_callback, + .ident = "HP ProBook", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook"), + }, + }, + { } /* terminating entry */ +}; + static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id) { u64 platform_info; @@ -1494,6 +1513,9 @@ static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id) u16 htshi, trc, trc_required_mask; u8 tse; + if (dmi_check_system(ips_blacklist)) + return -ENODEV; + ips = kzalloc(sizeof(struct ips_driver), GFP_KERNEL); if (!ips) return -ENOMEM; -- cgit v1.2.3 From da3789628f88684d3f0fb4e6a6bc086c395ac3cb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 27 Jun 2012 13:08:42 -0300 Subject: perf tools: Stop using a global trace events description list The pevent thing is per perf.data file, so I made it stop being static and become a perf_session member, so tools processing perf.data files use perf_session and _there_ we read the trace events description into session->pevent and then change everywhere to stop using that single global pevent variable and use the per session one. Note that it _doesn't_ fall backs to trace__event_id, as we're not interested at all in what is present in the /sys/kernel/debug/tracing/events in the workstation doing the analysis, just in what is in the perf.data file. This patch also introduces perf_session__set_tracepoints_handlers that is the perf perf.data/session way to associate handlers to tracepoint events by resolving their IDs using the events descriptions stored in a perf.data file. Make 'perf sched' use it. Reported-by: Dmitry Antipov Tested-by: Dmitry Antipov Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: linaro-dev@lists.linaro.org Cc: patches@linaro.org Link: http://lkml.kernel.org/r/20120625232016.GA28525@infradead.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kmem.c | 37 ++++++--- tools/perf/builtin-lock.c | 4 +- tools/perf/builtin-sched.c | 36 +++++--- tools/perf/builtin-script.c | 66 ++++++++++----- tools/perf/util/evlist.c | 4 +- tools/perf/util/evlist.h | 3 + tools/perf/util/header.c | 31 +++---- .../perf/util/scripting-engines/trace-event-perl.c | 28 ++++--- .../util/scripting-engines/trace-event-python.c | 21 +++-- tools/perf/util/session.c | 56 +++++++++++++ tools/perf/util/session.h | 10 +++ tools/perf/util/trace-event-parse.c | 58 ++++++------- tools/perf/util/trace-event-read.c | 97 +++++++++++----------- tools/perf/util/trace-event-scripting.c | 7 +- tools/perf/util/trace-event.h | 38 +++++---- 15 files changed, 314 insertions(+), 182 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 547af48deb4f..ce35015f2dc6 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -57,6 +57,11 @@ static unsigned long nr_allocs, nr_cross_allocs; #define PATH_SYS_NODE "/sys/devices/system/node" +struct perf_kmem { + struct perf_tool tool; + struct perf_session *session; +}; + static void init_cpunode_map(void) { FILE *fp; @@ -278,14 +283,16 @@ static void process_free_event(void *data, s_alloc->alloc_cpu = -1; } -static void process_raw_event(union perf_event *raw_event __used, void *data, +static void process_raw_event(struct perf_tool *tool, + union perf_event *raw_event __used, void *data, int cpu, u64 timestamp, struct thread *thread) { + struct perf_kmem *kmem = container_of(tool, struct perf_kmem, tool); struct event_format *event; int type; - type = trace_parse_common_type(data); - event = trace_find_event(type); + type = trace_parse_common_type(kmem->session->pevent, data); + event = pevent_find_event(kmem->session->pevent, type); if (!strcmp(event->name, "kmalloc") || !strcmp(event->name, "kmem_cache_alloc")) { @@ -306,7 +313,7 @@ static void process_raw_event(union perf_event *raw_event __used, void *data, } } -static int process_sample_event(struct perf_tool *tool __used, +static int process_sample_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel __used, @@ -322,16 +329,18 @@ static int process_sample_event(struct perf_tool *tool __used, dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - process_raw_event(event, sample->raw_data, sample->cpu, + process_raw_event(tool, event, sample->raw_data, sample->cpu, sample->time, thread); return 0; } -static struct perf_tool perf_kmem = { - .sample = process_sample_event, - .comm = perf_event__process_comm, - .ordered_samples = true, +static struct perf_kmem perf_kmem = { + .tool = { + .sample = process_sample_event, + .comm = perf_event__process_comm, + .ordered_samples = true, + }, }; static double fragmentation(unsigned long n_req, unsigned long n_alloc) @@ -486,11 +495,15 @@ static void sort_result(void) static int __cmd_kmem(void) { int err = -EINVAL; - struct perf_session *session = perf_session__new(input_name, O_RDONLY, - 0, false, &perf_kmem); + struct perf_session *session; + + session = perf_session__new(input_name, O_RDONLY, 0, false, + &perf_kmem.tool); if (session == NULL) return -ENOMEM; + perf_kmem.session = session; + if (perf_session__create_kernel_maps(session) < 0) goto out_delete; @@ -498,7 +511,7 @@ static int __cmd_kmem(void) goto out_delete; setup_pager(); - err = perf_session__process_events(session, &perf_kmem); + err = perf_session__process_events(session, &perf_kmem.tool); if (err != 0) goto out_delete; sort_result(); diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index fd53319de20d..b3c428548868 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -724,8 +724,8 @@ process_raw_event(void *data, int cpu, u64 timestamp, struct thread *thread) struct event_format *event; int type; - type = trace_parse_common_type(data); - event = trace_find_event(type); + type = trace_parse_common_type(session->pevent, data); + event = pevent_find_event(session->pevent, type); if (!strcmp(event->name, "lock_acquire")) process_lock_acquire_event(data, event, cpu, timestamp, thread); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 9fe77b185338..7a9ad2b1ee76 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -43,6 +43,11 @@ static u64 sleep_measurement_overhead; static unsigned long nr_tasks; +struct perf_sched { + struct perf_tool tool; + struct perf_session *session; +}; + struct sched_atom; struct task_desc { @@ -1597,6 +1602,8 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool, struct perf_evsel *evsel, struct machine *machine) { + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); + struct pevent *pevent = sched->session->pevent; struct thread *thread = machine__findnew_thread(machine, sample->pid); if (thread == NULL) { @@ -1612,7 +1619,8 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool, tracepoint_handler f = evsel->handler.func; if (evsel->handler.data == NULL) - evsel->handler.data = trace_find_event(evsel->attr.config); + evsel->handler.data = pevent_find_event(pevent, + evsel->attr.config); f(tool, evsel->handler.data, sample, machine, thread); } @@ -1620,12 +1628,14 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool, return 0; } -static struct perf_tool perf_sched = { - .sample = perf_sched__process_tracepoint_sample, - .comm = perf_event__process_comm, - .lost = perf_event__process_lost, - .fork = perf_event__process_task, - .ordered_samples = true, +static struct perf_sched perf_sched = { + .tool = { + .sample = perf_sched__process_tracepoint_sample, + .comm = perf_event__process_comm, + .lost = perf_event__process_lost, + .fork = perf_event__process_task, + .ordered_samples = true, + }, }; static void read_events(bool destroy, struct perf_session **psession) @@ -1640,16 +1650,20 @@ static void read_events(bool destroy, struct perf_session **psession) { "sched:sched_process_exit", process_sched_exit_event, }, { "sched:sched_migrate_task", process_sched_migrate_task_event, }, }; - struct perf_session *session = perf_session__new(input_name, O_RDONLY, - 0, false, &perf_sched); + struct perf_session *session; + + session = perf_session__new(input_name, O_RDONLY, 0, false, + &perf_sched.tool); if (session == NULL) die("No Memory"); - err = perf_evlist__set_tracepoints_handlers_array(session->evlist, handlers); + perf_sched.session = session; + + err = perf_session__set_tracepoints_handlers(session, handlers); assert(err == 0); if (perf_session__has_traces(session, "record -R")) { - err = perf_session__process_events(session, &perf_sched); + err = perf_session__process_events(session, &perf_sched.tool); if (err) die("Failed to process events, error %d", err); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 8fecd3b8130a..1e60ab70b2b1 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -28,6 +28,11 @@ static bool system_wide; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +struct perf_script { + struct perf_tool tool; + struct perf_session *session; +}; + enum perf_output_field { PERF_OUTPUT_COMM = 1U << 0, PERF_OUTPUT_TID = 1U << 1, @@ -257,7 +262,8 @@ static int perf_session__check_output_opt(struct perf_session *session) return 0; } -static void print_sample_start(struct perf_sample *sample, +static void print_sample_start(struct pevent *pevent, + struct perf_sample *sample, struct thread *thread, struct perf_evsel *evsel) { @@ -302,8 +308,14 @@ static void print_sample_start(struct perf_sample *sample, if (PRINT_FIELD(EVNAME)) { if (attr->type == PERF_TYPE_TRACEPOINT) { - type = trace_parse_common_type(sample->raw_data); - event = trace_find_event(type); + /* + * XXX Do we really need this here? + * perf_evlist__set_tracepoint_names should have done + * this already + */ + type = trace_parse_common_type(pevent, + sample->raw_data); + event = pevent_find_event(pevent, type); if (event) evname = event->name; } else @@ -404,6 +416,7 @@ static void print_sample_bts(union perf_event *event, } static void process_event(union perf_event *event __unused, + struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, @@ -414,7 +427,7 @@ static void process_event(union perf_event *event __unused, if (output[attr->type].fields == 0) return; - print_sample_start(sample, thread, evsel); + print_sample_start(pevent, sample, thread, evsel); if (is_bts_event(attr)) { print_sample_bts(event, sample, evsel, machine, thread); @@ -422,7 +435,7 @@ static void process_event(union perf_event *event __unused, } if (PRINT_FIELD(TRACE)) - print_trace_event(sample->cpu, sample->raw_data, + print_trace_event(pevent, sample->cpu, sample->raw_data, sample->raw_size); if (PRINT_FIELD(ADDR)) @@ -453,7 +466,8 @@ static int default_stop_script(void) return 0; } -static int default_generate_script(const char *outfile __unused) +static int default_generate_script(struct pevent *pevent __unused, + const char *outfile __unused) { return 0; } @@ -491,6 +505,7 @@ static int process_sample_event(struct perf_tool *tool __used, struct machine *machine) { struct addr_location al; + struct perf_script *scr = container_of(tool, struct perf_script, tool); struct thread *thread = machine__findnew_thread(machine, event->ip.tid); if (thread == NULL) { @@ -522,24 +537,27 @@ static int process_sample_event(struct perf_tool *tool __used, if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) return 0; - scripting_ops->process_event(event, sample, evsel, machine, thread); + scripting_ops->process_event(event, scr->session->pevent, + sample, evsel, machine, thread); evsel->hists.stats.total_period += sample->period; return 0; } -static struct perf_tool perf_script = { - .sample = process_sample_event, - .mmap = perf_event__process_mmap, - .comm = perf_event__process_comm, - .exit = perf_event__process_task, - .fork = perf_event__process_task, - .attr = perf_event__process_attr, - .event_type = perf_event__process_event_type, - .tracing_data = perf_event__process_tracing_data, - .build_id = perf_event__process_build_id, - .ordered_samples = true, - .ordering_requires_timestamps = true, +static struct perf_script perf_script = { + .tool = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .comm = perf_event__process_comm, + .exit = perf_event__process_task, + .fork = perf_event__process_task, + .attr = perf_event__process_attr, + .event_type = perf_event__process_event_type, + .tracing_data = perf_event__process_tracing_data, + .build_id = perf_event__process_build_id, + .ordered_samples = true, + .ordering_requires_timestamps = true, + }, }; extern volatile int session_done; @@ -555,7 +573,7 @@ static int __cmd_script(struct perf_session *session) signal(SIGINT, sig_handler); - ret = perf_session__process_events(session, &perf_script); + ret = perf_session__process_events(session, &perf_script.tool); if (debug_mode) pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); @@ -1337,10 +1355,13 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) if (!script_name) setup_pager(); - session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_script); + session = perf_session__new(input_name, O_RDONLY, 0, false, + &perf_script.tool); if (session == NULL) return -ENOMEM; + perf_script.session = session; + if (cpu_list) { if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap)) return -1; @@ -1386,7 +1407,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) return -1; } - err = scripting_ops->generate_script("perf-script"); + err = scripting_ops->generate_script(session->pevent, + "perf-script"); goto out; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 7400fb3fc50c..f74e9560350e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -224,8 +224,8 @@ out_free_attrs: return err; } -static struct perf_evsel * - perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) +struct perf_evsel * +perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) { struct perf_evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 989bee9624c2..40d4d3cdced0 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -73,6 +73,9 @@ int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, #define perf_evlist__set_tracepoints_handlers_array(evlist, array) \ perf_evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) +struct perf_evsel * +perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id); + void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu, int thread, u64 id); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a5e2015319ee..5a47aba46759 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1474,15 +1474,15 @@ out: static int process_tracing_data(struct perf_file_section *section __unused, struct perf_header *ph __unused, - int feat __unused, int fd) + int feat __unused, int fd, void *data) { - trace_report(fd, false); + trace_report(fd, data, false); return 0; } static int process_build_id(struct perf_file_section *section, struct perf_header *ph, - int feat __unused, int fd) + int feat __unused, int fd, void *data __used) { if (perf_header__read_build_ids(ph, fd, section->offset, section->size)) pr_debug("Failed to read buildids, continuing...\n"); @@ -1493,7 +1493,7 @@ struct feature_ops { int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); void (*print)(struct perf_header *h, int fd, FILE *fp); int (*process)(struct perf_file_section *section, - struct perf_header *h, int feat, int fd); + struct perf_header *h, int feat, int fd, void *data); const char *name; bool full_only; }; @@ -1988,7 +1988,7 @@ int perf_file_header__read(struct perf_file_header *header, static int perf_file_section__process(struct perf_file_section *section, struct perf_header *ph, - int feat, int fd, void *data __used) + int feat, int fd, void *data) { if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) { pr_debug("Failed to lseek to %" PRIu64 " offset for feature " @@ -2004,7 +2004,7 @@ static int perf_file_section__process(struct perf_file_section *section, if (!feat_ops[feat].process) return 0; - return feat_ops[feat].process(section, ph, feat, fd); + return feat_ops[feat].process(section, ph, feat, fd, data); } static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, @@ -2093,9 +2093,11 @@ static int read_attr(int fd, struct perf_header *ph, return ret <= 0 ? -1 : 0; } -static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel) +static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel, + struct pevent *pevent) { - struct event_format *event = trace_find_event(evsel->attr.config); + struct event_format *event = pevent_find_event(pevent, + evsel->attr.config); char bf[128]; if (event == NULL) @@ -2109,13 +2111,14 @@ static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel) return 0; } -static int perf_evlist__set_tracepoint_names(struct perf_evlist *evlist) +static int perf_evlist__set_tracepoint_names(struct perf_evlist *evlist, + struct pevent *pevent) { struct perf_evsel *pos; list_for_each_entry(pos, &evlist->entries, node) { if (pos->attr.type == PERF_TYPE_TRACEPOINT && - perf_evsel__set_tracepoint_name(pos)) + perf_evsel__set_tracepoint_name(pos, pevent)) return -1; } @@ -2198,12 +2201,12 @@ int perf_session__read_header(struct perf_session *session, int fd) event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); } - perf_header__process_sections(header, fd, NULL, + perf_header__process_sections(header, fd, &session->pevent, perf_file_section__process); lseek(fd, header->data_offset, SEEK_SET); - if (perf_evlist__set_tracepoint_names(session->evlist)) + if (perf_evlist__set_tracepoint_names(session->evlist, session->pevent)) goto out_delete_evlist; header->frozen = 1; @@ -2419,8 +2422,8 @@ int perf_event__process_tracing_data(union perf_event *event, lseek(session->fd, offset + sizeof(struct tracing_data_event), SEEK_SET); - size_read = trace_report(session->fd, session->repipe); - + size_read = trace_report(session->fd, &session->pevent, + session->repipe); padding = ALIGN(size_read, sizeof(u64)) - size_read; if (read(session->fd, buf, padding) < 0) diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 4c1b3d72a1d2..b3620fe12763 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -233,7 +233,8 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->next); } -static inline struct event_format *find_cache_event(int type) +static inline +struct event_format *find_cache_event(struct pevent *pevent, int type) { static char ev_name[256]; struct event_format *event; @@ -241,7 +242,7 @@ static inline struct event_format *find_cache_event(int type) if (events[type]) return events[type]; - events[type] = event = trace_find_event(type); + events[type] = event = pevent_find_event(pevent, type); if (!event) return NULL; @@ -252,7 +253,8 @@ static inline struct event_format *find_cache_event(int type) return event; } -static void perl_process_tracepoint(union perf_event *pevent __unused, +static void perl_process_tracepoint(union perf_event *perf_event __unused, + struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __unused, @@ -275,13 +277,13 @@ static void perl_process_tracepoint(union perf_event *pevent __unused, if (evsel->attr.type != PERF_TYPE_TRACEPOINT) return; - type = trace_parse_common_type(data); + type = trace_parse_common_type(pevent, data); - event = find_cache_event(type); + event = find_cache_event(pevent, type); if (!event) die("ug! no event found for type %d", type); - pid = trace_parse_common_pid(data); + pid = trace_parse_common_pid(pevent, data); sprintf(handler, "%s::%s", event->system, event->name); @@ -314,7 +316,8 @@ static void perl_process_tracepoint(union perf_event *pevent __unused, offset = field->offset; XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0))); } else { /* FIELD_IS_NUMERIC */ - val = read_size(data + field->offset, field->size); + val = read_size(pevent, data + field->offset, + field->size); if (field->flags & FIELD_IS_SIGNED) { XPUSHs(sv_2mortal(newSViv(val))); } else { @@ -368,14 +371,15 @@ static void perl_process_event_generic(union perf_event *pevent __unused, LEAVE; } -static void perl_process_event(union perf_event *pevent, +static void perl_process_event(union perf_event *event, + struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, struct thread *thread) { - perl_process_tracepoint(pevent, sample, evsel, machine, thread); - perl_process_event_generic(pevent, sample, evsel, machine, thread); + perl_process_tracepoint(event, pevent, sample, evsel, machine, thread); + perl_process_event_generic(event, sample, evsel, machine, thread); } static void run_start_sub(void) @@ -448,7 +452,7 @@ static int perl_stop_script(void) return 0; } -static int perl_generate_script(const char *outfile) +static int perl_generate_script(struct pevent *pevent, const char *outfile) { struct event_format *event = NULL; struct format_field *f; @@ -495,7 +499,7 @@ static int perl_generate_script(const char *outfile) fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n"); fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n\n"); - while ((event = trace_find_next_event(event))) { + while ((event = trace_find_next_event(pevent, event))) { fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name); fprintf(ofp, "\tmy ("); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index acb9795286c4..a8ca2f8179a9 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -190,7 +190,8 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->next); } -static inline struct event_format *find_cache_event(int type) +static inline +struct event_format *find_cache_event(struct pevent *pevent, int type) { static char ev_name[256]; struct event_format *event; @@ -198,7 +199,7 @@ static inline struct event_format *find_cache_event(int type) if (events[type]) return events[type]; - events[type] = event = trace_find_event(type); + events[type] = event = pevent_find_event(pevent, type); if (!event) return NULL; @@ -209,7 +210,8 @@ static inline struct event_format *find_cache_event(int type) return event; } -static void python_process_event(union perf_event *pevent __unused, +static void python_process_event(union perf_event *perf_event __unused, + struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel __unused, struct machine *machine __unused, @@ -233,13 +235,13 @@ static void python_process_event(union perf_event *pevent __unused, if (!t) Py_FatalError("couldn't create Python tuple"); - type = trace_parse_common_type(data); + type = trace_parse_common_type(pevent, data); - event = find_cache_event(type); + event = find_cache_event(pevent, type); if (!event) die("ug! no event found for type %d", type); - pid = trace_parse_common_pid(data); + pid = trace_parse_common_pid(pevent, data); sprintf(handler_name, "%s__%s", event->system, event->name); @@ -284,7 +286,8 @@ static void python_process_event(union perf_event *pevent __unused, offset = field->offset; obj = PyString_FromString((char *)data + offset); } else { /* FIELD_IS_NUMERIC */ - val = read_size(data + field->offset, field->size); + val = read_size(pevent, data + field->offset, + field->size); if (field->flags & FIELD_IS_SIGNED) { if ((long long)val >= LONG_MIN && (long long)val <= LONG_MAX) @@ -438,7 +441,7 @@ out: return err; } -static int python_generate_script(const char *outfile) +static int python_generate_script(struct pevent *pevent, const char *outfile) { struct event_format *event = NULL; struct format_field *f; @@ -487,7 +490,7 @@ static int python_generate_script(const char *outfile) fprintf(ofp, "def trace_end():\n"); fprintf(ofp, "\tprint \"in trace_end\"\n\n"); - while ((event = trace_find_next_event(event))) { + while ((event = trace_find_next_event(pevent, event))) { fprintf(ofp, "def %s__%s(", event->system, event->name); fprintf(ofp, "event_name, "); fprintf(ofp, "context, "); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 6b305fbcc986..f5baff1495e6 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -14,6 +14,7 @@ #include "sort.h" #include "util.h" #include "cpumap.h" +#include "event-parse.h" static int perf_session__open(struct perf_session *self, bool force) { @@ -1610,3 +1611,58 @@ void perf_session__fprintf_info(struct perf_session *session, FILE *fp, perf_header__fprintf_info(session, fp, full); fprintf(fp, "# ========\n#\n"); } + + +int __perf_session__set_tracepoints_handlers(struct perf_session *session, + const struct perf_evsel_str_handler *assocs, + size_t nr_assocs) +{ + struct perf_evlist *evlist = session->evlist; + struct event_format *format; + struct perf_evsel *evsel; + char *tracepoint, *name; + size_t i; + int err; + + for (i = 0; i < nr_assocs; i++) { + err = -ENOMEM; + tracepoint = strdup(assocs[i].name); + if (tracepoint == NULL) + goto out; + + err = -ENOENT; + name = strchr(tracepoint, ':'); + if (name == NULL) + goto out_free; + + *name++ = '\0'; + format = pevent_find_event_by_name(session->pevent, + tracepoint, name); + if (format == NULL) { + /* + * Adding a handler for an event not in the session, + * just ignore it. + */ + goto next; + } + + evsel = perf_evlist__find_tracepoint_by_id(evlist, format->id); + if (evsel == NULL) + goto next; + + err = -EEXIST; + if (evsel->handler.func != NULL) + goto out_free; + evsel->handler.func = assocs[i].handler; +next: + free(tracepoint); + } + + err = 0; +out: + return err; + +out_free: + free(tracepoint); + goto out; +} diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index c71a1a7b05ed..7c435bde6eb0 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -33,6 +33,7 @@ struct perf_session { struct machine host_machine; struct rb_root machines; struct perf_evlist *evlist; + struct pevent *pevent; /* * FIXME: Need to split this up further, we need global * stats + per event stats. 'perf diff' also needs @@ -158,4 +159,13 @@ int perf_session__cpu_bitmap(struct perf_session *session, const char *cpu_list, unsigned long *cpu_bitmap); void perf_session__fprintf_info(struct perf_session *s, FILE *fp, bool full); + +struct perf_evsel_str_handler; + +int __perf_session__set_tracepoints_handlers(struct perf_session *session, + const struct perf_evsel_str_handler *assocs, + size_t nr_assocs); + +#define perf_session__set_tracepoints_handlers(session, array) \ + __perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array)) #endif /* __PERF_SESSION_H */ diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index df2fddbf0cd2..a51bd86f4d09 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -32,29 +32,25 @@ int header_page_size_size; int header_page_ts_size; int header_page_data_offset; -struct pevent *perf_pevent; -static struct pevent *pevent; - bool latency_format; -int read_trace_init(int file_bigendian, int host_bigendian) +struct pevent *read_trace_init(int file_bigendian, int host_bigendian) { - if (pevent) - return 0; - - perf_pevent = pevent_alloc(); - pevent = perf_pevent; + struct pevent *pevent = pevent_alloc(); - pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); - pevent_set_file_bigendian(pevent, file_bigendian); - pevent_set_host_bigendian(pevent, host_bigendian); + if (pevent != NULL) { + pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); + pevent_set_file_bigendian(pevent, file_bigendian); + pevent_set_host_bigendian(pevent, host_bigendian); + } - return 0; + return pevent; } static int get_common_field(struct scripting_context *context, int *offset, int *size, const char *type) { + struct pevent *pevent = context->pevent; struct event_format *event; struct format_field *field; @@ -150,7 +146,7 @@ void *raw_field_ptr(struct event_format *event, const char *name, void *data) return data + field->offset; } -int trace_parse_common_type(void *data) +int trace_parse_common_type(struct pevent *pevent, void *data) { struct pevent_record record; @@ -158,7 +154,7 @@ int trace_parse_common_type(void *data) return pevent_data_type(pevent, &record); } -int trace_parse_common_pid(void *data) +int trace_parse_common_pid(struct pevent *pevent, void *data) { struct pevent_record record; @@ -166,27 +162,21 @@ int trace_parse_common_pid(void *data) return pevent_data_pid(pevent, &record); } -unsigned long long read_size(void *ptr, int size) +unsigned long long read_size(struct pevent *pevent, void *ptr, int size) { return pevent_read_number(pevent, ptr, size); } -struct event_format *trace_find_event(int type) -{ - return pevent_find_event(pevent, type); -} - - -void print_trace_event(int cpu, void *data, int size) +void print_trace_event(struct pevent *pevent, int cpu, void *data, int size) { struct event_format *event; struct pevent_record record; struct trace_seq s; int type; - type = trace_parse_common_type(data); + type = trace_parse_common_type(pevent, data); - event = trace_find_event(type); + event = pevent_find_event(pevent, type); if (!event) { warning("ug! no event found for type %d", type); return; @@ -203,8 +193,8 @@ void print_trace_event(int cpu, void *data, int size) printf("\n"); } -void print_event(int cpu, void *data, int size, unsigned long long nsecs, - char *comm) +void print_event(struct pevent *pevent, int cpu, void *data, int size, + unsigned long long nsecs, char *comm) { struct pevent_record record; struct trace_seq s; @@ -227,7 +217,8 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs, printf("\n"); } -void parse_proc_kallsyms(char *file, unsigned int size __unused) +void parse_proc_kallsyms(struct pevent *pevent, + char *file, unsigned int size __unused) { unsigned long long addr; char *func; @@ -258,7 +249,8 @@ void parse_proc_kallsyms(char *file, unsigned int size __unused) } } -void parse_ftrace_printk(char *file, unsigned int size __unused) +void parse_ftrace_printk(struct pevent *pevent, + char *file, unsigned int size __unused) { unsigned long long addr; char *printk; @@ -282,17 +274,19 @@ void parse_ftrace_printk(char *file, unsigned int size __unused) } } -int parse_ftrace_file(char *buf, unsigned long size) +int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size) { return pevent_parse_event(pevent, buf, size, "ftrace"); } -int parse_event_file(char *buf, unsigned long size, char *sys) +int parse_event_file(struct pevent *pevent, + char *buf, unsigned long size, char *sys) { return pevent_parse_event(pevent, buf, size, sys); } -struct event_format *trace_find_next_event(struct event_format *event) +struct event_format *trace_find_next_event(struct pevent *pevent, + struct event_format *event) { static int idx; diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index f097e0dd6c5c..719ed74a8565 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -114,20 +114,20 @@ static void skip(int size) }; } -static unsigned int read4(void) +static unsigned int read4(struct pevent *pevent) { unsigned int data; read_or_die(&data, 4); - return __data2host4(perf_pevent, data); + return __data2host4(pevent, data); } -static unsigned long long read8(void) +static unsigned long long read8(struct pevent *pevent) { unsigned long long data; read_or_die(&data, 8); - return __data2host8(perf_pevent, data); + return __data2host8(pevent, data); } static char *read_string(void) @@ -168,12 +168,12 @@ static char *read_string(void) return str; } -static void read_proc_kallsyms(void) +static void read_proc_kallsyms(struct pevent *pevent) { unsigned int size; char *buf; - size = read4(); + size = read4(pevent); if (!size) return; @@ -181,29 +181,29 @@ static void read_proc_kallsyms(void) read_or_die(buf, size); buf[size] = '\0'; - parse_proc_kallsyms(buf, size); + parse_proc_kallsyms(pevent, buf, size); free(buf); } -static void read_ftrace_printk(void) +static void read_ftrace_printk(struct pevent *pevent) { unsigned int size; char *buf; - size = read4(); + size = read4(pevent); if (!size) return; buf = malloc_or_die(size); read_or_die(buf, size); - parse_ftrace_printk(buf, size); + parse_ftrace_printk(pevent, buf, size); free(buf); } -static void read_header_files(void) +static void read_header_files(struct pevent *pevent) { unsigned long long size; char *header_event; @@ -214,7 +214,7 @@ static void read_header_files(void) if (memcmp(buf, "header_page", 12) != 0) die("did not read header page"); - size = read8(); + size = read8(pevent); skip(size); /* @@ -227,47 +227,48 @@ static void read_header_files(void) if (memcmp(buf, "header_event", 13) != 0) die("did not read header event"); - size = read8(); + size = read8(pevent); header_event = malloc_or_die(size); read_or_die(header_event, size); free(header_event); } -static void read_ftrace_file(unsigned long long size) +static void read_ftrace_file(struct pevent *pevent, unsigned long long size) { char *buf; buf = malloc_or_die(size); read_or_die(buf, size); - parse_ftrace_file(buf, size); + parse_ftrace_file(pevent, buf, size); free(buf); } -static void read_event_file(char *sys, unsigned long long size) +static void read_event_file(struct pevent *pevent, char *sys, + unsigned long long size) { char *buf; buf = malloc_or_die(size); read_or_die(buf, size); - parse_event_file(buf, size, sys); + parse_event_file(pevent, buf, size, sys); free(buf); } -static void read_ftrace_files(void) +static void read_ftrace_files(struct pevent *pevent) { unsigned long long size; int count; int i; - count = read4(); + count = read4(pevent); for (i = 0; i < count; i++) { - size = read8(); - read_ftrace_file(size); + size = read8(pevent); + read_ftrace_file(pevent, size); } } -static void read_event_files(void) +static void read_event_files(struct pevent *pevent) { unsigned long long size; char *sys; @@ -275,15 +276,15 @@ static void read_event_files(void) int count; int i,x; - systems = read4(); + systems = read4(pevent); for (i = 0; i < systems; i++) { sys = read_string(); - count = read4(); + count = read4(pevent); for (x=0; x < count; x++) { - size = read8(); - read_event_file(sys, size); + size = read8(pevent); + read_event_file(pevent, sys, size); } } } @@ -377,7 +378,7 @@ static int calc_index(void *ptr, int cpu) return (unsigned long)ptr - (unsigned long)cpu_data[cpu].page; } -struct pevent_record *trace_peek_data(int cpu) +struct pevent_record *trace_peek_data(struct pevent *pevent, int cpu) { struct pevent_record *data; void *page = cpu_data[cpu].page; @@ -399,15 +400,15 @@ struct pevent_record *trace_peek_data(int cpu) /* FIXME: handle header page */ if (header_page_ts_size != 8) die("expected a long long type for timestamp"); - cpu_data[cpu].timestamp = data2host8(perf_pevent, ptr); + cpu_data[cpu].timestamp = data2host8(pevent, ptr); ptr += 8; switch (header_page_size_size) { case 4: - cpu_data[cpu].page_size = data2host4(perf_pevent, ptr); + cpu_data[cpu].page_size = data2host4(pevent, ptr); ptr += 4; break; case 8: - cpu_data[cpu].page_size = data2host8(perf_pevent, ptr); + cpu_data[cpu].page_size = data2host8(pevent, ptr); ptr += 8; break; default: @@ -421,10 +422,10 @@ read_again: if (idx >= cpu_data[cpu].page_size) { get_next_page(cpu); - return trace_peek_data(cpu); + return trace_peek_data(pevent, cpu); } - type_len_ts = data2host4(perf_pevent, ptr); + type_len_ts = data2host4(pevent, ptr); ptr += 4; type_len = type_len4host(type_len_ts); @@ -434,14 +435,14 @@ read_again: case RINGBUF_TYPE_PADDING: if (!delta) die("error, hit unexpected end of page"); - length = data2host4(perf_pevent, ptr); + length = data2host4(pevent, ptr); ptr += 4; length *= 4; ptr += length; goto read_again; case RINGBUF_TYPE_TIME_EXTEND: - extend = data2host4(perf_pevent, ptr); + extend = data2host4(pevent, ptr); ptr += 4; extend <<= TS_SHIFT; extend += delta; @@ -452,7 +453,7 @@ read_again: ptr += 12; break; case 0: - length = data2host4(perf_pevent, ptr); + length = data2host4(pevent, ptr); ptr += 4; die("here! length=%d", length); break; @@ -477,17 +478,17 @@ read_again: return data; } -struct pevent_record *trace_read_data(int cpu) +struct pevent_record *trace_read_data(struct pevent *pevent, int cpu) { struct pevent_record *data; - data = trace_peek_data(cpu); + data = trace_peek_data(pevent, cpu); cpu_data[cpu].next = NULL; return data; } -ssize_t trace_report(int fd, bool __repipe) +ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe) { char buf[BUFSIZ]; char test[] = { 23, 8, 68 }; @@ -519,30 +520,32 @@ ssize_t trace_report(int fd, bool __repipe) file_bigendian = buf[0]; host_bigendian = bigendian(); - read_trace_init(file_bigendian, host_bigendian); + *ppevent = read_trace_init(file_bigendian, host_bigendian); + if (*ppevent == NULL) + die("read_trace_init failed"); read_or_die(buf, 1); long_size = buf[0]; - page_size = read4(); + page_size = read4(*ppevent); - read_header_files(); + read_header_files(*ppevent); - read_ftrace_files(); - read_event_files(); - read_proc_kallsyms(); - read_ftrace_printk(); + read_ftrace_files(*ppevent); + read_event_files(*ppevent); + read_proc_kallsyms(*ppevent); + read_ftrace_printk(*ppevent); size = calc_data_size - 1; calc_data_size = 0; repipe = false; if (show_funcs) { - pevent_print_funcs(perf_pevent); + pevent_print_funcs(*ppevent); return size; } if (show_printk) { - pevent_print_printk(perf_pevent); + pevent_print_printk(*ppevent); return size; } diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 18ae6c1831d3..474aa7a7df43 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -36,6 +36,7 @@ static int stop_script_unsupported(void) } static void process_event_unsupported(union perf_event *event __unused, + struct pevent *pevent __unused, struct perf_sample *sample __unused, struct perf_evsel *evsel __unused, struct machine *machine __unused, @@ -61,7 +62,8 @@ static int python_start_script_unsupported(const char *script __unused, return -1; } -static int python_generate_script_unsupported(const char *outfile __unused) +static int python_generate_script_unsupported(struct pevent *pevent __unused, + const char *outfile __unused) { print_python_unsupported_msg(); @@ -122,7 +124,8 @@ static int perl_start_script_unsupported(const char *script __unused, return -1; } -static int perl_generate_script_unsupported(const char *outfile __unused) +static int perl_generate_script_unsupported(struct pevent *pevent __unused, + const char *outfile __unused) { print_perl_unsupported_msg(); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 639852ac1117..8fef1d6687b7 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -8,6 +8,7 @@ struct machine; struct perf_sample; union perf_event; +struct perf_tool; struct thread; extern int header_page_size_size; @@ -29,35 +30,36 @@ enum { int bigendian(void); -int read_trace_init(int file_bigendian, int host_bigendian); -void print_trace_event(int cpu, void *data, int size); +struct pevent *read_trace_init(int file_bigendian, int host_bigendian); +void print_trace_event(struct pevent *pevent, int cpu, void *data, int size); -void print_event(int cpu, void *data, int size, unsigned long long nsecs, - char *comm); +void print_event(struct pevent *pevent, int cpu, void *data, int size, + unsigned long long nsecs, char *comm); -int parse_ftrace_file(char *buf, unsigned long size); -int parse_event_file(char *buf, unsigned long size, char *sys); +int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size); +int parse_event_file(struct pevent *pevent, + char *buf, unsigned long size, char *sys); -struct pevent_record *trace_peek_data(int cpu); -struct event_format *trace_find_event(int type); +struct pevent_record *trace_peek_data(struct pevent *pevent, int cpu); unsigned long long raw_field_value(struct event_format *event, const char *name, void *data); void *raw_field_ptr(struct event_format *event, const char *name, void *data); -void parse_proc_kallsyms(char *file, unsigned int size __unused); -void parse_ftrace_printk(char *file, unsigned int size __unused); +void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size); +void parse_ftrace_printk(struct pevent *pevent, char *file, unsigned int size); -ssize_t trace_report(int fd, bool repipe); +ssize_t trace_report(int fd, struct pevent **pevent, bool repipe); -int trace_parse_common_type(void *data); -int trace_parse_common_pid(void *data); +int trace_parse_common_type(struct pevent *pevent, void *data); +int trace_parse_common_pid(struct pevent *pevent, void *data); -struct event_format *trace_find_next_event(struct event_format *event); -unsigned long long read_size(void *ptr, int size); +struct event_format *trace_find_next_event(struct pevent *pevent, + struct event_format *event); +unsigned long long read_size(struct pevent *pevent, void *ptr, int size); unsigned long long eval_flag(const char *flag); -struct pevent_record *trace_read_data(int cpu); +struct pevent_record *trace_read_data(struct pevent *pevent, int cpu); int read_tracing_data(int fd, struct list_head *pattrs); struct tracing_data { @@ -77,11 +79,12 @@ struct scripting_ops { int (*start_script) (const char *script, int argc, const char **argv); int (*stop_script) (void); void (*process_event) (union perf_event *event, + struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, struct thread *thread); - int (*generate_script) (const char *outfile); + int (*generate_script) (struct pevent *pevent, const char *outfile); }; int script_spec_register(const char *spec, struct scripting_ops *ops); @@ -90,6 +93,7 @@ void setup_perl_scripting(void); void setup_python_scripting(void); struct scripting_context { + struct pevent *pevent; void *event_data; }; -- cgit v1.2.3 From 209bd9e3e14712d74c8bebb028afda905d689f1c Mon Sep 17 00:00:00 2001 From: "Pierre-Loup A. Griffais" Date: Fri, 22 Jun 2012 11:38:13 -0700 Subject: perf symbols: Follow .gnu_debuglink section to find separate symbols The .gnu_debuglink section is specified to contain the filename of the debug info file, as well as a CRC that can be used to validate it. This doesn't currently use the checksum and relies on the usual build-id matching for validation. This provides more context: http://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html Signed-off-by: Pierre-Loup A. Griffais Reported-by: Mike Sartain Tested-by: Mike Sartain Cc: Ingo Molnar Cc: Linus Torvalds Cc: Mike Sartain Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/4FE4BB95.3080309@nvidia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/symbol.h | 1 + 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 3e2e5ea0f03f..994f4ffdcd05 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1590,11 +1590,62 @@ out: return err; } +static int filename__read_debuglink(const char *filename, + char *debuglink, size_t size) +{ + int fd, err = -1; + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + Elf_Data *data; + Elf_Scn *sec; + Elf_Kind ek; + + fd = open(filename, O_RDONLY); + if (fd < 0) + goto out; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) { + pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); + goto out_close; + } + + ek = elf_kind(elf); + if (ek != ELF_K_ELF) + goto out_close; + + if (gelf_getehdr(elf, &ehdr) == NULL) { + pr_err("%s: cannot get elf header.\n", __func__); + goto out_close; + } + + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".gnu_debuglink", NULL); + if (sec == NULL) + goto out_close; + + data = elf_getdata(sec, NULL); + if (data == NULL) + goto out_close; + + /* the start of this section is a zero-terminated string */ + strncpy(debuglink, data->d_buf, size); + + elf_end(elf); + +out_close: + close(fd); +out: + return err; +} + char dso__symtab_origin(const struct dso *dso) { static const char origin[] = { [SYMTAB__KALLSYMS] = 'k', [SYMTAB__JAVA_JIT] = 'j', + [SYMTAB__DEBUGLINK] = 'l', [SYMTAB__BUILD_ID_CACHE] = 'B', [SYMTAB__FEDORA_DEBUGINFO] = 'f', [SYMTAB__UBUNTU_DEBUGINFO] = 'u', @@ -1662,10 +1713,22 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) */ want_symtab = 1; restart: - for (dso->symtab_type = SYMTAB__BUILD_ID_CACHE; + for (dso->symtab_type = SYMTAB__DEBUGLINK; dso->symtab_type != SYMTAB__NOT_FOUND; dso->symtab_type++) { switch (dso->symtab_type) { + case SYMTAB__DEBUGLINK: { + char *debuglink; + strncpy(name, dso->long_name, size); + debuglink = name + dso->long_name_len; + while (debuglink != name && *debuglink != '/') + debuglink--; + if (*debuglink == '/') + debuglink++; + filename__read_debuglink(dso->long_name, debuglink, + size - (debuglink - name)); + } + break; case SYMTAB__BUILD_ID_CACHE: /* skip the locally configured cache if a symfs is given */ if (symbol_conf.symfs[0] || diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index af0752b1aca1..a884b99017f0 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -257,6 +257,7 @@ enum symtab_type { SYMTAB__KALLSYMS = 0, SYMTAB__GUEST_KALLSYMS, SYMTAB__JAVA_JIT, + SYMTAB__DEBUGLINK, SYMTAB__BUILD_ID_CACHE, SYMTAB__FEDORA_DEBUGINFO, SYMTAB__UBUNTU_DEBUGINFO, -- cgit v1.2.3 From 08942f6d5d992e9486b07653fd87ea8182a22fa0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 20 Jun 2012 15:08:06 +0900 Subject: perf bench: Documentation update The current perf-bench documentation has a couple of typos and even lacks entire description of mem subsystem. Fix it. Reported-by: Ingo Molnar Signed-off-by: Namhyung Kim Acked-by: Hitoshi Mitake Cc: Hitoshi Mitake Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1340172486-17805-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-bench.txt | 78 +++++++++++++++++++++++++++++++-- tools/perf/bench/mem-memcpy.c | 4 +- tools/perf/bench/mem-memset.c | 8 ++-- tools/perf/builtin-bench.c | 4 +- 4 files changed, 83 insertions(+), 11 deletions(-) diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index a3dbadb26ef5..f3c716a4cad3 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -12,7 +12,7 @@ SYNOPSIS DESCRIPTION ----------- -This 'perf bench' command is general framework for benchmark suites. +This 'perf bench' command is a general framework for benchmark suites. COMMON OPTIONS -------------- @@ -45,14 +45,20 @@ SUBSYSTEM 'sched':: Scheduler and IPC mechanisms. +'mem':: + Memory access performance. + +'all':: + All benchmark subsystems. + SUITES FOR 'sched' ~~~~~~~~~~~~~~~~~~ *messaging*:: Suite for evaluating performance of scheduler and IPC mechanisms. Based on hackbench by Rusty Russell. -Options of *pipe* -^^^^^^^^^^^^^^^^^ +Options of *messaging* +^^^^^^^^^^^^^^^^^^^^^^ -p:: --pipe:: Use pipe() instead of socketpair() @@ -115,6 +121,72 @@ Example of *pipe* 59004 ops/sec --------------------- +SUITES FOR 'mem' +~~~~~~~~~~~~~~~~ +*memcpy*:: +Suite for evaluating performance of simple memory copy in various ways. + +Options of *memcpy* +^^^^^^^^^^^^^^^^^^^ +-l:: +--length:: +Specify length of memory to copy (default: 1MB). +Available units are B, KB, MB, GB and TB (case insensitive). + +-r:: +--routine:: +Specify routine to copy (default: default). +Available routines are depend on the architecture. +On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported. + +-i:: +--iterations:: +Repeat memcpy invocation this number of times. + +-c:: +--clock:: +Use perf's cpu-cycles event instead of gettimeofday syscall. + +-o:: +--only-prefault:: +Show only the result with page faults before memcpy. + +-n:: +--no-prefault:: +Show only the result without page faults before memcpy. + +*memset*:: +Suite for evaluating performance of simple memory set in various ways. + +Options of *memset* +^^^^^^^^^^^^^^^^^^^ +-l:: +--length:: +Specify length of memory to set (default: 1MB). +Available units are B, KB, MB, GB and TB (case insensitive). + +-r:: +--routine:: +Specify routine to set (default: default). +Available routines are depend on the architecture. +On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported. + +-i:: +--iterations:: +Repeat memset invocation this number of times. + +-c:: +--clock:: +Use perf's cpu-cycles event instead of gettimeofday syscall. + +-o:: +--only-prefault:: +Show only the result with page faults before memset. + +-n:: +--no-prefault:: +Show only the result without page faults before memset. + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 71557225bf92..d990365cafa0 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -32,13 +32,13 @@ static bool no_prefault; static const struct option options[] = { OPT_STRING('l', "length", &length_str, "1MB", "Specify length of memory to copy. " - "available unit: B, MB, GB (upper and lower)"), + "Available units: B, KB, MB, GB and TB (upper and lower)"), OPT_STRING('r', "routine", &routine, "default", "Specify routine to copy"), OPT_INTEGER('i', "iterations", &iterations, "repeat memcpy() invocation this number of times"), OPT_BOOLEAN('c', "clock", &use_clock, - "Use CPU clock for measuring"), + "Use cycles event instead of gettimeofday() for measuring"), OPT_BOOLEAN('o', "only-prefault", &only_prefault, "Show only the result with page faults before memcpy()"), OPT_BOOLEAN('n', "no-prefault", &no_prefault, diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c index e9079185bd72..bf0d5f552017 100644 --- a/tools/perf/bench/mem-memset.c +++ b/tools/perf/bench/mem-memset.c @@ -31,14 +31,14 @@ static bool no_prefault; static const struct option options[] = { OPT_STRING('l', "length", &length_str, "1MB", - "Specify length of memory to copy. " - "available unit: B, MB, GB (upper and lower)"), + "Specify length of memory to set. " + "Available units: B, KB, MB, GB and TB (upper and lower)"), OPT_STRING('r', "routine", &routine, "default", - "Specify routine to copy"), + "Specify routine to set"), OPT_INTEGER('i', "iterations", &iterations, "repeat memset() invocation this number of times"), OPT_BOOLEAN('c', "clock", &use_clock, - "Use CPU clock for measuring"), + "Use cycles event instead of gettimeofday() for measuring"), OPT_BOOLEAN('o', "only-prefault", &only_prefault, "Show only the result with page faults before memset()"), OPT_BOOLEAN('n', "no-prefault", &no_prefault, diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index b0e74ab2d7a2..1f3100216448 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -33,7 +33,7 @@ struct bench_suite { }; \ /* sentinel: easy for help */ -#define suite_all { "all", "test all suite (pseudo suite)", NULL } +#define suite_all { "all", "Test all benchmark suites", NULL } static struct bench_suite sched_suites[] = { { "messaging", @@ -75,7 +75,7 @@ static struct bench_subsys subsystems[] = { "memory access performance", mem_suites }, { "all", /* sentinel: easy for help */ - "test all subsystem (pseudo subsystem)", + "all benchmark subsystem", NULL }, { NULL, NULL, -- cgit v1.2.3 From 300aa941650e98966ad85847527537df5b11a87e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 11 Jun 2012 13:48:41 -0600 Subject: perf report: Delay sample_type checks in pipe mode The pipeline: perf record -a -g -o - sleep 5 |perf inject -v -b | perf report -g -i - generates the warning: Selected -g but no callchain data. Did you call 'perf record' without -g? The problem is that the header data is not written to the pipe, so the sample_type has not been available when perf_report__setup_sample_type is called. For pipe mode, record dumps the sample type as part of the synthesized events stream -- perf_event__synthesize_attrs(). Handle this be detecting pipe mode and not doing early sanity checks on sample_type. Signed-off-by: David Ahern Tested-by: Tim Chen Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Link: http://lkml.kernel.org/r/1339444121-26236-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 40b0ffc3ad3b..69b1c1185159 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -245,11 +245,12 @@ static int process_read_event(struct perf_tool *tool, return 0; } +/* For pipe mode, sample_type is not currently set */ static int perf_report__setup_sample_type(struct perf_report *rep) { struct perf_session *self = rep->session; - if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) { + if (!self->fd_pipe && !(self->sample_type & PERF_SAMPLE_CALLCHAIN)) { if (sort__has_parent) { ui__error("Selected --sort parent, but no " "callchain data. Did you call " @@ -272,7 +273,8 @@ static int perf_report__setup_sample_type(struct perf_report *rep) } if (sort__branch_mode == 1) { - if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) { + if (!self->fd_pipe && + !(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) { ui__error("Selected -b but no branch data. " "Did you call perf record without -b?\n"); return -1; -- cgit v1.2.3 From d9873ab79376d5c0112ed09e14783067dc65e808 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 11 Jun 2012 19:13:32 -0600 Subject: perf tools: Trivial build fix References to OUTPUT should not be followed by a '/'. When a build output directory is not specified for this case you get: gcc -o builtin-annotate.o -c ... -I/util ... which is wrong. Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Link: http://lkml.kernel.org/r/1339463612-30937-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index d698c118a602..75d74e5db8d5 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -155,7 +155,7 @@ endif ### --- END CONFIGURATION SECTION --- -BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)/util -I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE +BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)util -I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE BASIC_LDFLAGS = # Guard against environment variables -- cgit v1.2.3 From 0be61ebc18b919dddbdbcd1c4f42513c310ecf59 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 18 Jun 2012 09:28:16 -0400 Subject: tracing/selftest: Add a WARN_ON() if a tracer test fails Add a WARN_ON() output on test failures so that they are easier to detect in automated tests. Although, the WARN_ON() will not print if the test causes the system to crash, obviously. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 49249c28690d..748f6401edf6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -830,6 +830,8 @@ int register_tracer(struct tracer *type) current_trace = saved_tracer; if (ret) { printk(KERN_CONT "FAILED!\n"); + /* Add the warning after printing 'FAILED' */ + WARN_ON(1); goto out; } /* Only reset on passing, to avoid touching corrupted buffers */ -- cgit v1.2.3 From b102f1d0f1cd0bb5ec82e5aeb1e33502d6ad6710 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Jun 2012 09:41:39 +0900 Subject: tracing/kvm: Use __print_hex() for kvm_emulate_insn tracepoint The kvm_emulate_insn tracepoint used __print_insn() for printing its instructions. However it makes the format of the event hard to parse as it reveals TP internals. Fortunately, kernel provides __print_hex for almost same purpose, we can use it instead of open coding it. The user-space can be changed to parse it later. That means raw kernel tracing will not be affected by this change: # cd /sys/kernel/debug/tracing/ # cat events/kvm/kvm_emulate_insn/format name: kvm_emulate_insn ID: 29 format: ... print fmt: "%x:%llx:%s (%s)%s", REC->csbase, REC->rip, __print_hex(REC->insn, REC->len), \ __print_symbolic(REC->flags, { 0, "real" }, { (1 << 0) | (1 << 1), "vm16" }, \ { (1 << 0), "prot16" }, { (1 << 0) | (1 << 2), "prot32" }, { (1 << 0) | (1 << 3), "prot64" }), \ REC->failed ? " failed" : "" # echo 1 > events/kvm/kvm_emulate_insn/enable # cat trace # tracer: nop # # entries-in-buffer/entries-written: 2183/2183 #P:12 # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | qemu-kvm-1782 [002] ...1 140.931636: kvm_emulate_insn: 0:c102fa25:89 10 (prot32) qemu-kvm-1781 [004] ...1 140.931637: kvm_emulate_insn: 0:c102fa25:89 10 (prot32) Link: http://lkml.kernel.org/n/tip-wfw6y3b9ugtey8snaow9nmg5@git.kernel.org Link: http://lkml.kernel.org/r/1340757701-10711-2-git-send-email-namhyung@kernel.org Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Namhyung Kim Cc: kvm@vger.kernel.org Acked-by: Avi Kivity Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- arch/x86/kvm/trace.h | 12 +----------- include/trace/ftrace.h | 1 + 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 911d2641f14c..62d02e3c3ed6 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -710,16 +710,6 @@ TRACE_EVENT(kvm_skinit, __entry->rip, __entry->slb) ); -#define __print_insn(insn, ilen) ({ \ - int i; \ - const char *ret = p->buffer + p->len; \ - \ - for (i = 0; i < ilen; ++i) \ - trace_seq_printf(p, " %02x", insn[i]); \ - trace_seq_printf(p, "%c", 0); \ - ret; \ - }) - #define KVM_EMUL_INSN_F_CR0_PE (1 << 0) #define KVM_EMUL_INSN_F_EFL_VM (1 << 1) #define KVM_EMUL_INSN_F_CS_D (1 << 2) @@ -786,7 +776,7 @@ TRACE_EVENT(kvm_emulate_insn, TP_printk("%x:%llx:%s (%s)%s", __entry->csbase, __entry->rip, - __print_insn(__entry->insn, __entry->len), + __print_hex(__entry->insn, __entry->len), __print_symbolic(__entry->flags, kvm_trace_symbol_emul_flags), __entry->failed ? " failed" : "" diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 769724944fc6..c6bc2faaf261 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -571,6 +571,7 @@ static inline void ftrace_test_probe_##call(void) \ #undef __print_flags #undef __print_symbolic +#undef __print_hex #undef __get_dynamic_array #undef __get_str -- cgit v1.2.3 From 6d158a813efcd09661c23f16ddf7e2ff834cb20c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 27 Jun 2012 20:46:14 -0400 Subject: tracing: Remove NR_CPUS array from trace_iterator Replace the NR_CPUS array of buffer_iter from the trace_iterator with an allocated array. This will just create an array of possible CPUS instead of the max number specified. The use of NR_CPUS in that array caused allocation failures for machines that were tight on memory. This did not cause any failures to the system itself (no crashes), but caused unnecessary failures for reading the trace files. Added a helper function called 'trace_buffer_iter()' that returns the buffer_iter item or NULL if it is not defined or the array was not allocated. Some routines do not require the array (tracing_open_pipe() for one). Reported-by: Dave Jones Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 +- kernel/trace/trace.c | 27 ++++++++++++++++++--------- kernel/trace/trace.h | 8 ++++++++ kernel/trace/trace_functions_graph.c | 2 +- 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 1aff18346c71..af961d6f7ab1 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -65,7 +65,7 @@ struct trace_iterator { void *private; int cpu_file; struct mutex mutex; - struct ring_buffer_iter *buffer_iter[NR_CPUS]; + struct ring_buffer_iter **buffer_iter; unsigned long iter_flags; /* trace_seq for __print_flags() and __print_symbolic() etc. */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 748f6401edf6..b2af14e94c28 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1710,9 +1710,11 @@ EXPORT_SYMBOL_GPL(trace_vprintk); static void trace_iterator_increment(struct trace_iterator *iter) { + struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); + iter->idx++; - if (iter->buffer_iter[iter->cpu]) - ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); + if (buf_iter) + ring_buffer_read(buf_iter, NULL); } static struct trace_entry * @@ -1720,7 +1722,7 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, unsigned long *lost_events) { struct ring_buffer_event *event; - struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; + struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); if (buf_iter) event = ring_buffer_iter_peek(buf_iter, ts); @@ -1858,10 +1860,10 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) tr->data[cpu]->skipped_entries = 0; - if (!iter->buffer_iter[cpu]) + buf_iter = trace_buffer_iter(iter, cpu); + if (!buf_iter) return; - buf_iter = iter->buffer_iter[cpu]; ring_buffer_iter_reset(buf_iter); /* @@ -2207,13 +2209,15 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) int trace_empty(struct trace_iterator *iter) { + struct ring_buffer_iter *buf_iter; int cpu; /* If we are looking at one CPU buffer, only check that one */ if (iter->cpu_file != TRACE_PIPE_ALL_CPU) { cpu = iter->cpu_file; - if (iter->buffer_iter[cpu]) { - if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) + buf_iter = trace_buffer_iter(iter, cpu); + if (buf_iter) { + if (!ring_buffer_iter_empty(buf_iter)) return 0; } else { if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) @@ -2223,8 +2227,9 @@ int trace_empty(struct trace_iterator *iter) } for_each_tracing_cpu(cpu) { - if (iter->buffer_iter[cpu]) { - if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) + buf_iter = trace_buffer_iter(iter, cpu); + if (buf_iter) { + if (!ring_buffer_iter_empty(buf_iter)) return 0; } else { if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) @@ -2383,6 +2388,8 @@ __tracing_open(struct inode *inode, struct file *file) if (!iter) return ERR_PTR(-ENOMEM); + iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(), + GFP_KERNEL); /* * We make a copy of the current tracer to avoid concurrent * changes on it while we are reading. @@ -2443,6 +2450,7 @@ __tracing_open(struct inode *inode, struct file *file) fail: mutex_unlock(&trace_types_lock); kfree(iter->trace); + kfree(iter->buffer_iter); seq_release_private(inode, file); return ERR_PTR(-ENOMEM); } @@ -2483,6 +2491,7 @@ static int tracing_release(struct inode *inode, struct file *file) mutex_destroy(&iter->mutex); free_cpumask_var(iter->started); kfree(iter->trace); + kfree(iter->buffer_iter); seq_release_private(inode, file); return 0; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 5aec220d2de0..55e1f7f0db12 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -317,6 +317,14 @@ struct tracer { #define TRACE_PIPE_ALL_CPU -1 +static inline struct ring_buffer_iter * +trace_buffer_iter(struct trace_iterator *iter, int cpu) +{ + if (iter->buffer_iter && iter->buffer_iter[cpu]) + return iter->buffer_iter[cpu]; + return NULL; +} + int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); void trace_wake_up(void); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index a7d2a4c653d8..ce27c8ba8d31 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -538,7 +538,7 @@ get_return_for_leaf(struct trace_iterator *iter, next = &data->ret; } else { - ring_iter = iter->buffer_iter[iter->cpu]; + ring_iter = trace_buffer_iter(iter, iter->cpu); /* First peek to compare current entry and the next one */ if (ring_iter) -- cgit v1.2.3 From a5fb833172eca69136e9ee1ada778e404086ab8a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 28 Jun 2012 13:35:04 -0400 Subject: ring-buffer: Fix uninitialized read_stamp The ring buffer reader page is used to swap a page from the writable ring buffer. If the writer happens to be on that page, it ends up on the reader page, but will simply move off of it, back into the writable ring buffer as writes are added. The time stamp passed back to the readers is stored in the cpu_buffer per CPU descriptor. This stamp is updated when a swap of the reader page takes place, and it reads the current stamp from the page taken from the writable ring buffer. Everytime a writer goes to a new page, it updates the time stamp of that page. The problem happens if a reader reads a page from an empty per CPU ring buffer. If the buffer is empty, the swap still takes place, placing the writer at the start of the reader page. If at a later time, a write happens, it updates the page's time stamp and continues. But the problem is that the read_stamp does not get updated, because the page was already swapped. The solution to this was to not swap the page if the ring buffer happens to be empty. This also removes the side effect that the writes on the reader page will not get updated because the writer never gets back on the reader page without a swap. That is, if a read happens on an empty buffer, but then no reads happen for a while. If a swap took place, and the writer were to start writing a lot of data (function tracer), it will start overflowing the ring buffer and overwrite the older data. But because the writer never goes back onto the reader page, the data left on the reader page never gets overwritten. This causes the reader to see really old data, followed by a jump to newer data. Link: http://lkml.kernel.org/r/1340060577-9112-1-git-send-email-dhsharp@google.com Google-Bug-Id: 6410455 Reported-by: David Sharp tested-by: David Sharp Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1d0f6a8a0e5e..82a3e0c56b1d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3239,6 +3239,10 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) if (cpu_buffer->commit_page == cpu_buffer->reader_page) goto out; + /* Don't bother swapping if the ring buffer is empty */ + if (rb_num_of_entries(cpu_buffer) == 0) + goto out; + /* * Reset the reader page to size zero. */ -- cgit v1.2.3 From f526a4ce2643e2636c091cf2bf0ec1442110c5b7 Mon Sep 17 00:00:00 2001 From: Konstantin Stepanyuk Date: Wed, 27 Jun 2012 15:52:14 -0600 Subject: tools lib traceevent: Fix clean target in Makefile Dependency files were not cleaned up. Add missing space to fix the issue. Signed-off-by: Konstantin Stepanyuk Acked-by: Steven Rostedt Cc: Frederic Weisbecker Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1340833934-18783-1-git-send-email-konstantin.stepanyuk@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 3d69aa9ff51e..423f4b81ecce 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -290,7 +290,7 @@ install_lib: all_cmd install_plugins install_python install: install_lib clean: - $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES).*.d + $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d $(RM) tags TAGS endif # skip-makefile -- cgit v1.2.3 From 6545e3a8f0666b60b26202a5271a94f7cd9601a8 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 22 Jun 2012 17:10:14 +0900 Subject: tools lib traceevent: Teach [ce]tags about libtraceeevent error codes As we use a macro trick to sync each error codes with its description string, teach [ce]tags to process them properly. This patch modifies the libtraceevent's Makefile not a kernel one. Suggested-by: Steven Rostedt Signed-off-by: Namhyung Kim Link: http://lkml.kernel.org/n/tip-3101nsbg52glxdqih291qj74@git.kernel.org Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1340352615-20737-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 423f4b81ecce..34a577e7d554 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -270,11 +270,13 @@ endif tags: force $(RM) tags - find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px + find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ + --regex-c++='/_PE\(([^,)]*).*/PEVENT_ERRNO__\1/' TAGS: force $(RM) TAGS - find . -name '*.[ch]' | xargs etags + find . -name '*.[ch]' | xargs etags \ + --regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/' define do_install $(print_install) \ -- cgit v1.2.3 From 860df5833e461beba4bf9f3304a7919da98fd789 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 22 Jun 2012 14:37:36 +0900 Subject: tools lib traceevent: Make dependency files regeneratable Ingo reported that libtraceevent doesn't clean out dependency (.d) files and it can cause a build error when the libgcc package upgraded: comet:~/tip/tools/perf> make -j SUBDIR ../lib/traceevent/ make[1]: *** No rule to make target `/usr/lib/gcc/x86_64-redhat-linux/4.7.0/include/stddef.h', needed by `event-parse.o'. Stop. make: *** [../lib/traceevent//libtraceevent.a] Error 2 So this patch makes the .d files depends on the source and header files also, so that it can be re-generated as needed. NOTE: This code is copied from the GNU make manual page (4.14 Generating Prerequisites Automatically). Reported-by: Ingo Molnar Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1340343462-15556-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 34a577e7d554..46c2f6b7b123 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -250,8 +250,12 @@ endef all_objs := $(sort $(ALL_OBJS)) all_deps := $(all_objs:%.o=.%.d) +# let .d file also depends on the source and header files define check_deps - $(CC) -M $(CFLAGS) $< > $@; + @set -e; $(RM) $@; \ + $(CC) -M $(CFLAGS) $< > $@.$$$$; \ + sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ + $(RM) $@.$$$$ endef $(gui_deps): ks_version.h -- cgit v1.2.3 From 600da3cfe19496485c5d8d52ff703590a0bd53f6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 22 Jun 2012 17:10:15 +0900 Subject: tools lib traceevent: Check string is really printable When libtraceevent parses format fields, it assumes that array of 1 byte is string but it's not always true. The kvm_emulate_insn contains 15 u8 array of insn that contains (binary) instructions. Thus when it's printed, it'll have broken output like below: kvm_emulate_insn: [FAILED TO PARSE] rip=3238197797 csbase=0 len=2 \ insn=<89>P^]& flags=5 failed=0 With this patch: kvm_emulate_insn: [FAILED TO PARSE] rip=3238197797 csbase=0 len=2 \ insn=ARRAY[89, 10, 5d, c3, 8d, b4, 26, 00, 00, 00, 00, 55, 89, e5, 3e] flags=5 failed=0 Suggested-by: Steven Rostedt Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1340352615-20737-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 554828219c33..b203a50c0f22 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3589,6 +3589,16 @@ static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size, trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); } +static int is_printable_array(char *p, unsigned int len) +{ + unsigned int i; + + for (i = 0; i < len && p[i]; i++) + if (!isprint(p[i])) + return 0; + return 1; +} + static void print_event_fields(struct trace_seq *s, void *data, int size, struct event_format *event) { @@ -3608,7 +3618,8 @@ static void print_event_fields(struct trace_seq *s, void *data, int size, len = offset >> 16; offset &= 0xffff; } - if (field->flags & FIELD_IS_STRING) { + if (field->flags & FIELD_IS_STRING && + is_printable_array(data + offset, len)) { trace_seq_printf(s, "%s", (char *)data + offset); } else { trace_seq_puts(s, "ARRAY["); @@ -3619,6 +3630,7 @@ static void print_event_fields(struct trace_seq *s, void *data, int size, *((unsigned char *)data + offset + i)); } trace_seq_putc(s, ']'); + field->flags &= ~FIELD_IS_STRING; } } else { val = pevent_read_number(event->pevent, data + field->offset, -- cgit v1.2.3 From b700807196ac8d87e00fed9fda80ab89b7f56db6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Jun 2012 09:41:40 +0900 Subject: tools lib traceevent: Use local variable 'field' Use local variable 'field' to reduce typing. It is needed by later patch not to exceed 80 column. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1340757701-10711-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index b203a50c0f22..63d02be5480f 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3214,6 +3214,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, { struct pevent *pevent = event->pevent; struct print_flag_sym *flag; + struct format_field *field; unsigned long long val, fval; unsigned long addr; char *str; @@ -3228,27 +3229,29 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, print_str_to_seq(s, format, len_arg, arg->atom.atom); return; case PRINT_FIELD: - if (!arg->field.field) { - arg->field.field = pevent_find_any_field(event, arg->field.name); - if (!arg->field.field) + field = arg->field.field; + if (!field) { + field = pevent_find_any_field(event, arg->field.name); + if (!field) die("field %s not found", arg->field.name); + arg->field.field = field; } /* Zero sized fields, mean the rest of the data */ - len = arg->field.field->size ? : size - arg->field.field->offset; + len = field->size ? : size - field->offset; /* * Some events pass in pointers. If this is not an array * and the size is the same as long_size, assume that it * is a pointer. */ - if (!(arg->field.field->flags & FIELD_IS_ARRAY) && - arg->field.field->size == pevent->long_size) { - addr = *(unsigned long *)(data + arg->field.field->offset); + if (!(field->flags & FIELD_IS_ARRAY) && + field->size == pevent->long_size) { + addr = *(unsigned long *)(data + field->offset); trace_seq_printf(s, "%lx", addr); break; } str = malloc_or_die(len + 1); - memcpy(str, data + arg->field.field->offset, len); + memcpy(str, data + field->offset, len); str[len] = 0; print_str_to_seq(s, format, len_arg, str); free(str); -- cgit v1.2.3 From e080e6f1c863242ff709046d0486d09c46dc484a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Jun 2012 09:41:41 +0900 Subject: tools lib traceevent: Add support for __print_hex() Since the __print_hex() function is used in print fmt now, add corresponding parser routines. This makes the output of perf script on the kvm_emulate_insn event not to fail any more. before: kvm_emulate_insn: [FAILED TO PARSE] rip=3238197797 ... after: kvm_emulate_insn: 0:c102fa25:89 10 (prot32) Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1340757701-10711-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 75 +++++++++++++++++++++- tools/lib/traceevent/event-parse.h | 7 ++ .../perf/util/scripting-engines/trace-event-perl.c | 4 ++ .../util/scripting-engines/trace-event-python.c | 4 ++ 4 files changed, 89 insertions(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 63d02be5480f..b1abd39923d6 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -697,6 +697,10 @@ static void free_arg(struct print_arg *arg) free_arg(arg->symbol.field); free_flag_sym(arg->symbol.symbols); break; + case PRINT_HEX: + free_arg(arg->hex.field); + free_arg(arg->hex.size); + break; case PRINT_TYPE: free(arg->typecast.type); free_arg(arg->typecast.item); @@ -2259,6 +2263,45 @@ process_symbols(struct event_format *event, struct print_arg *arg, char **tok) return EVENT_ERROR; } +static enum event_type +process_hex(struct event_format *event, struct print_arg *arg, char **tok) +{ + struct print_arg *field; + enum event_type type; + char *token; + + memset(arg, 0, sizeof(*arg)); + arg->type = PRINT_HEX; + + field = alloc_arg(); + type = process_arg(event, field, &token); + + if (test_type_token(type, token, EVENT_DELIM, ",")) + goto out_free; + + arg->hex.field = field; + + free_token(token); + + field = alloc_arg(); + type = process_arg(event, field, &token); + + if (test_type_token(type, token, EVENT_DELIM, ")")) + goto out_free; + + arg->hex.size = field; + + free_token(token); + type = read_token_item(tok); + return type; + + out_free: + free_arg(field); + free_token(token); + *tok = NULL; + return EVENT_ERROR; +} + static enum event_type process_dynamic_array(struct event_format *event, struct print_arg *arg, char **tok) { @@ -2488,6 +2531,10 @@ process_function(struct event_format *event, struct print_arg *arg, is_symbolic_field = 1; return process_symbols(event, arg, tok); } + if (strcmp(token, "__print_hex") == 0) { + free_token(token); + return process_hex(event, arg, tok); + } if (strcmp(token, "__get_str") == 0) { free_token(token); return process_str(event, arg, tok); @@ -2995,6 +3042,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg break; case PRINT_FLAGS: case PRINT_SYMBOL: + case PRINT_HEX: break; case PRINT_TYPE: val = eval_num_arg(data, size, event, arg->typecast.item); @@ -3218,8 +3266,9 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, unsigned long long val, fval; unsigned long addr; char *str; + unsigned char *hex; int print; - int len; + int i, len; switch (arg->type) { case PRINT_NULL: @@ -3284,6 +3333,23 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, } } break; + case PRINT_HEX: + field = arg->hex.field->field.field; + if (!field) { + str = arg->hex.field->field.name; + field = pevent_find_any_field(event, str); + if (!field) + die("field %s not found", str); + arg->hex.field->field.field = field; + } + hex = data + field->offset; + len = eval_num_arg(data, size, event, arg->hex.size); + for (i = 0; i < len; i++) { + if (i) + trace_seq_putc(s, ' '); + trace_seq_printf(s, "%02x", hex[i]); + } + break; case PRINT_TYPE: break; @@ -4294,6 +4360,13 @@ static void print_args(struct print_arg *args) trace_seq_destroy(&s); printf(")"); break; + case PRINT_HEX: + printf("__print_hex("); + print_args(args->hex.field); + printf(", "); + print_args(args->hex.size); + printf(")"); + break; case PRINT_STRING: case PRINT_BSTRING: printf("__get_str(%s)", args->string.string); diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index ac997bc7b592..5772ad8cb386 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -226,6 +226,11 @@ struct print_arg_symbol { struct print_flag_sym *symbols; }; +struct print_arg_hex { + struct print_arg *field; + struct print_arg *size; +}; + struct print_arg_dynarray { struct format_field *field; struct print_arg *index; @@ -253,6 +258,7 @@ enum print_arg_type { PRINT_FIELD, PRINT_FLAGS, PRINT_SYMBOL, + PRINT_HEX, PRINT_TYPE, PRINT_STRING, PRINT_BSTRING, @@ -270,6 +276,7 @@ struct print_arg { struct print_arg_typecast typecast; struct print_arg_flags flags; struct print_arg_symbol symbol; + struct print_arg_hex hex; struct print_arg_func func; struct print_arg_string string; struct print_arg_op op; diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index b3620fe12763..02dfa19a467f 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -209,6 +209,10 @@ static void define_event_symbols(struct event_format *event, define_symbolic_values(args->symbol.symbols, ev_name, cur_field_name); break; + case PRINT_HEX: + define_event_symbols(event, ev_name, args->hex.field); + define_event_symbols(event, ev_name, args->hex.size); + break; case PRINT_BSTRING: case PRINT_DYNAMIC_ARRAY: case PRINT_STRING: diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index a8ca2f8179a9..ce4d1b0c3862 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -166,6 +166,10 @@ static void define_event_symbols(struct event_format *event, define_values(PRINT_SYMBOL, args->symbol.symbols, ev_name, cur_field_name); break; + case PRINT_HEX: + define_event_symbols(event, ev_name, args->hex.field); + define_event_symbols(event, ev_name, args->hex.size); + break; case PRINT_STRING: break; case PRINT_TYPE: -- cgit v1.2.3 From 50d8f9e393c5a1a8864fda75e3a9f2b800497a61 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 11 Jun 2012 15:28:53 +0900 Subject: tools lib traceevent: Replace malloc_or_die to plain malloc in alloc_event() Because the only caller of the alloc_event() (pevent_parse_event) checks return value properly, it can be changed to use plain malloc. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1339396133-9839-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index b1abd39923d6..83f0a8add177 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -616,7 +616,9 @@ static struct event_format *alloc_event(void) { struct event_format *event; - event = malloc_or_die(sizeof(*event)); + event = malloc(sizeof(*event)); + if (!event) + return NULL; memset(event, 0, sizeof(*event)); return event; -- cgit v1.2.3 From 7582732f57e547929d4c65ad8e38f3446e0538d8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 29 Jun 2012 09:22:54 +0200 Subject: perf tools: Fix hw breakpoint's type modifier parsing Fixing the hw breakpoint's type modifier parsing to allow all possible combinations of 'rwx' characters. Adding automated tests to the parsing test suite. Reported-by: Jovi Zhang Original-patch-by: Namhyung Kim Signed-off-by: Jiri Olsa Cc: Jovi Zhang Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120629072254.GA940@krava.brq.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events-test.c | 37 +++++++++++++++++++++++++++++++++++++ tools/perf/util/parse-events.c | 16 +++++++++++++--- tools/perf/util/parse-events.l | 2 +- 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index a0f61a2a6835..de81fe1f9329 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -181,6 +181,22 @@ static int test__checkevent_breakpoint_w(struct perf_evlist *evlist) return 0; } +static int test__checkevent_breakpoint_rw(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", + PERF_TYPE_BREAKPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); + TEST_ASSERT_VAL("wrong bp_type", + (HW_BREAKPOINT_R|HW_BREAKPOINT_W) == evsel->attr.bp_type); + TEST_ASSERT_VAL("wrong bp_len", + HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len); + return 0; +} + static int test__checkevent_tracepoint_modifier(struct perf_evlist *evlist) { struct perf_evsel *evsel = list_entry(evlist->entries.next, @@ -352,6 +368,19 @@ static int test__checkevent_breakpoint_w_modifier(struct perf_evlist *evlist) return test__checkevent_breakpoint_w(evlist); } +static int test__checkevent_breakpoint_rw_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = list_entry(evlist->entries.next, + struct perf_evsel, node); + + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); + + return test__checkevent_breakpoint_rw(evlist); +} + static int test__checkevent_pmu(struct perf_evlist *evlist) { @@ -585,6 +614,14 @@ static struct test__event_st test__events[] = { .name = "instructions:H", .check = test__checkevent_exclude_guest_modifier, }, + [26] = { + .name = "mem:0:rw", + .check = test__checkevent_breakpoint_rw, + }, + [27] = { + .name = "mem:0:rw:kp", + .check = test__checkevent_breakpoint_rw_modifier, + }, }; #define TEST__EVENTS_CNT (sizeof(test__events) / sizeof(struct test__event_st)) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 0cc27da30ddb..7ae76af709f1 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -383,21 +383,31 @@ parse_breakpoint_type(const char *type, struct perf_event_attr *attr) if (!type || !type[i]) break; +#define CHECK_SET_TYPE(bit) \ +do { \ + if (attr->bp_type & bit) \ + return -EINVAL; \ + else \ + attr->bp_type |= bit; \ +} while (0) + switch (type[i]) { case 'r': - attr->bp_type |= HW_BREAKPOINT_R; + CHECK_SET_TYPE(HW_BREAKPOINT_R); break; case 'w': - attr->bp_type |= HW_BREAKPOINT_W; + CHECK_SET_TYPE(HW_BREAKPOINT_W); break; case 'x': - attr->bp_type |= HW_BREAKPOINT_X; + CHECK_SET_TYPE(HW_BREAKPOINT_X); break; default: return -EINVAL; } } +#undef CHECK_SET_TYPE + if (!attr->bp_type) /* Default */ attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 488362e14133..a06689474210 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -76,7 +76,7 @@ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ name [a-zA-Z_*?][a-zA-Z0-9_*?]* modifier_event [ukhpGH]{1,8} -modifier_bp [rwx] +modifier_bp [rwx]{1,3} %% -- cgit v1.2.3 From 287e74aa3db097469bdca401f33f00ef20dc710d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 28 Jun 2012 23:18:49 +0200 Subject: perf evsel: Handle hw breakpoints event names in perf_evsel__name() Adding hw breakpoint events hook in the perf_evsel__name function, to display event names properly all over the perf tools. Updated hw breakpoints events tests. Signed-off-by: Jiri Olsa Cc: Jovi Zhang Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1340918329-3012-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 30 ++++++++++++++++++++++++++++++ tools/perf/util/parse-events-test.c | 10 ++++++++++ tools/perf/util/parse-events.c | 4 +--- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3d1f6968f175..e81771364867 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -15,6 +15,7 @@ #include "cpumap.h" #include "thread_map.h" #include "target.h" +#include "../../../include/linux/hw_breakpoint.h" #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0)) @@ -152,6 +153,31 @@ static int perf_evsel__sw_name(struct perf_evsel *evsel, char *bf, size_t size) return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); } +static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) +{ + int r; + + r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr); + + if (type & HW_BREAKPOINT_R) + r += scnprintf(bf + r, size - r, "r"); + + if (type & HW_BREAKPOINT_W) + r += scnprintf(bf + r, size - r, "w"); + + if (type & HW_BREAKPOINT_X) + r += scnprintf(bf + r, size - r, "x"); + + return r; +} + +static int perf_evsel__bp_name(struct perf_evsel *evsel, char *bf, size_t size) +{ + struct perf_event_attr *attr = &evsel->attr; + int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type); + return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); +} + const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] [PERF_EVSEL__MAX_ALIASES] = { { "L1-dcache", "l1-d", "l1d", "L1-data", }, @@ -285,6 +311,10 @@ const char *perf_evsel__name(struct perf_evsel *evsel) scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint"); break; + case PERF_TYPE_BREAKPOINT: + perf_evsel__bp_name(evsel, bf, sizeof(bf)); + break; + default: scnprintf(bf, sizeof(bf), "%s", "unknown attr type"); break; diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index de81fe1f9329..dd0c306a0698 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -325,6 +325,8 @@ static int test__checkevent_breakpoint_modifier(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong name", + !strcmp(perf_evsel__name(evsel), "mem:0x0:rw:u")); return test__checkevent_breakpoint(evlist); } @@ -338,6 +340,8 @@ static int test__checkevent_breakpoint_x_modifier(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong name", + !strcmp(perf_evsel__name(evsel), "mem:0x0:x:k")); return test__checkevent_breakpoint_x(evlist); } @@ -351,6 +355,8 @@ static int test__checkevent_breakpoint_r_modifier(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong name", + !strcmp(perf_evsel__name(evsel), "mem:0x0:r:hp")); return test__checkevent_breakpoint_r(evlist); } @@ -364,6 +370,8 @@ static int test__checkevent_breakpoint_w_modifier(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong name", + !strcmp(perf_evsel__name(evsel), "mem:0x0:w:up")); return test__checkevent_breakpoint_w(evlist); } @@ -377,6 +385,8 @@ static int test__checkevent_breakpoint_rw_modifier(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong name", + !strcmp(perf_evsel__name(evsel), "mem:0x0:rw:kp")); return test__checkevent_breakpoint_rw(evlist); } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 7ae76af709f1..1dc44dc69133 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -418,7 +418,6 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx, void *ptr, char *type) { struct perf_event_attr attr; - char name[MAX_NAME_LEN]; memset(&attr, 0, sizeof(attr)); attr.bp_addr = (unsigned long) ptr; @@ -437,8 +436,7 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx, attr.type = PERF_TYPE_BREAKPOINT; - snprintf(name, MAX_NAME_LEN, "mem:%p:%s", ptr, type ? type : "rw"); - return add_event(list, idx, &attr, name); + return add_event(list, idx, &attr, NULL); } static int config_term(struct perf_event_attr *attr, -- cgit v1.2.3 From 9bc8f9fe2c6e3778202c76ef85ef291567c00cb8 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 14 Jun 2012 22:38:37 +0200 Subject: perf tools: Fix generation of pmu list The internal pmu list was never used. With each perf_pmu__find() call the pmu structure was created new by parsing sysfs. Beside this it caused memory leaks. We now keep all pmus by adding them to the list. Also, pmu_lookup() should return pmus that do not expose the format specifier in sysfs. We need a valid internal pmu list in a later patch to iterate over all pmus that exist in the system. Signed-off-by: Robert Richter Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1339706321-8802-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 74d0948ec368..67715a42cd6d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -72,7 +72,7 @@ static int pmu_format(char *name, struct list_head *format) "%s/bus/event_source/devices/%s/format", sysfs, name); if (stat(path, &st) < 0) - return -1; + return 0; /* no error if format does not exist */ if (pmu_format_parse(path, format)) return -1; @@ -252,6 +252,7 @@ static struct perf_pmu *pmu_lookup(char *name) list_splice(&aliases, &pmu->aliases); pmu->name = strdup(name); pmu->type = type; + list_add_tail(&pmu->list, &pmus); return pmu; } -- cgit v1.2.3 From 1388d715dd7d0f494c93dfdef6ab26719218b868 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 19 Jun 2012 17:48:05 +0200 Subject: perf symbols: Add '.note' check into search for NOTE section Adding '.note' section name to be check when looking for notes section. The '.note' name is used by kernel VDSO. Signed-off-by: Jiri Olsa Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1340120894-9465-15-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 994f4ffdcd05..50958bbeb26a 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1478,14 +1478,31 @@ static int elf_read_build_id(Elf *elf, void *bf, size_t size) goto out; } - sec = elf_section_by_name(elf, &ehdr, &shdr, - ".note.gnu.build-id", NULL); - if (sec == NULL) { + /* + * Check following sections for notes: + * '.note.gnu.build-id' + * '.notes' + * '.note' (VDSO specific) + */ + do { + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".note.gnu.build-id", NULL); + if (sec) + break; + sec = elf_section_by_name(elf, &ehdr, &shdr, ".notes", NULL); - if (sec == NULL) - goto out; - } + if (sec) + break; + + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".note", NULL); + if (sec) + break; + + return err; + + } while (0); data = elf_getdata(sec, NULL); if (data == NULL) -- cgit v1.2.3 From 339ce005091b156c2af4c016c6ba9c1f87cd826a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 19 Jun 2012 17:48:11 +0200 Subject: perf tools: Adding round_up/round_down macros Adding round_up and round_down macros. They will be used in upcoming patches. Signed-off-by: Jiri Olsa Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1340120894-9465-21-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/include/linux/kernel.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h index 1eb804fd3fbf..b6842c1d02a8 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/perf/util/include/linux/kernel.h @@ -108,4 +108,14 @@ int eprintf(int level, #define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__) #define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__) +/* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + #endif -- cgit v1.2.3 From 954e482bde20b0e208fd4d34ef26e10afd194600 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 24 May 2012 18:19:45 -0700 Subject: x86/copy_user_generic: Optimize copy_user_generic with CPU erms feature According to Intel 64 and IA-32 SDM and Optimization Reference Manual, beginning with Ivybridge, REG string operation using MOVSB and STOSB can provide both flexible and high-performance REG string operations in cases like memory copy. Enhancement availability is indicated by CPUID.7.0.EBX[9] (Enhanced REP MOVSB/ STOSB). If CPU erms feature is detected, patch copy_user_generic with enhanced fast string version of copy_user_generic. A few new macros are defined to reduce duplicate code in ALTERNATIVE and ALTERNATIVE_2. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1337908785-14015-1-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/alternative.h | 74 ++++++++++++++++++++++++++++++-------- arch/x86/include/asm/uaccess_64.h | 11 +++++- arch/x86/kernel/x8664_ksyms_64.c | 1 + 3 files changed, 70 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 49331bedc158..70780689599a 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -75,23 +75,54 @@ static inline int alternatives_text_reserved(void *start, void *end) } #endif /* CONFIG_SMP */ +#define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n" + +#define b_replacement(number) "663"#number +#define e_replacement(number) "664"#number + +#define alt_slen "662b-661b" +#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f" + +#define ALTINSTR_ENTRY(feature, number) \ + " .long 661b - .\n" /* label */ \ + " .long " b_replacement(number)"f - .\n" /* new instruction */ \ + " .word " __stringify(feature) "\n" /* feature bit */ \ + " .byte " alt_slen "\n" /* source len */ \ + " .byte " alt_rlen(number) "\n" /* replacement len */ + +#define DISCARD_ENTRY(number) /* rlen <= slen */ \ + " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n" + +#define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \ + b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t" + /* alternative assembly primitive: */ #define ALTERNATIVE(oldinstr, newinstr, feature) \ - \ - "661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - " .long 661b - .\n" /* label */ \ - " .long 663f - .\n" /* new instruction */ \ - " .word " __stringify(feature) "\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .discard,\"aw\",@progbits\n" \ - " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \ - ".previous\n" \ - ".section .altinstr_replacement, \"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" + OLDINSTR(oldinstr) \ + ".section .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature, 1) \ + ".previous\n" \ + ".section .discard,\"aw\",@progbits\n" \ + DISCARD_ENTRY(1) \ + ".previous\n" \ + ".section .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ + ".previous" + +#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ + OLDINSTR(oldinstr) \ + ".section .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature1, 1) \ + ALTINSTR_ENTRY(feature2, 2) \ + ".previous\n" \ + ".section .discard,\"aw\",@progbits\n" \ + DISCARD_ENTRY(1) \ + DISCARD_ENTRY(2) \ + ".previous\n" \ + ".section .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ + ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ + ".previous" /* * This must be included *after* the definition of ALTERNATIVE due to @@ -139,6 +170,19 @@ static inline int alternatives_text_reserved(void *start, void *end) asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \ : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) +/* + * Like alternative_call, but there are two features and respective functions. + * If CPU has feature2, function2 is used. + * Otherwise, if CPU has feature1, function1 is used. + * Otherwise, old function is used. + */ +#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ + output, input...) \ + asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ + "call %P[new2]", feature2) \ + : output : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ + [new2] "i" (newfunc2), ## input) + /* * use this macro(s) if you need more than one output parameter * in alternative_io diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 8e796fbbf9c6..d8def8b3dba0 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -17,6 +17,8 @@ /* Handles exceptions in both to and from, but doesn't do access_ok */ __must_check unsigned long +copy_user_enhanced_fast_string(void *to, const void *from, unsigned len); +__must_check unsigned long copy_user_generic_string(void *to, const void *from, unsigned len); __must_check unsigned long copy_user_generic_unrolled(void *to, const void *from, unsigned len); @@ -26,9 +28,16 @@ copy_user_generic(void *to, const void *from, unsigned len) { unsigned ret; - alternative_call(copy_user_generic_unrolled, + /* + * If CPU has ERMS feature, use copy_user_enhanced_fast_string. + * Otherwise, if CPU has rep_good feature, use copy_user_generic_string. + * Otherwise, use copy_user_generic_unrolled. + */ + alternative_call_2(copy_user_generic_unrolled, copy_user_generic_string, X86_FEATURE_REP_GOOD, + copy_user_enhanced_fast_string, + X86_FEATURE_ERMS, ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from), "=d" (len)), "1" (to), "2" (from), "3" (len) diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 9796c2f3d074..6020f6f5927c 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -28,6 +28,7 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(copy_user_generic_string); EXPORT_SYMBOL(copy_user_generic_unrolled); +EXPORT_SYMBOL(copy_user_enhanced_fast_string); EXPORT_SYMBOL(__copy_user_nocache); EXPORT_SYMBOL(_copy_from_user); EXPORT_SYMBOL(_copy_to_user); -- cgit v1.2.3 From c9fc3f778a6a215ace14ee556067c73982b6d40f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 21 Jun 2012 14:07:16 +0200 Subject: x86, microcode: Sanitize per-cpu microcode reloading interface Microcode reloading in a per-core manner is a very bad idea for both major x86 vendors. And the thing is, we have such interface with which we can end up with different microcode versions applied on different cores of an otherwise homogeneous wrt (family,model,stepping) system. So turn off the possibility of doing that per core and allow it only system-wide. This is a minimal fix which we'd like to see in stable too thus the more-or-less arbitrary decision to allow system-wide reloading only on the BSP: $ echo 1 > /sys/devices/system/cpu/cpu0/microcode/reload ... and disable the interface on the other cores: $ echo 1 > /sys/devices/system/cpu/cpu23/microcode/reload -bash: echo: write error: Invalid argument Also, allowing the reload only from one CPU (the BSP in that case) doesn't allow the reload procedure to degenerate into an O(n^2) deal when triggering reloads from all /sys/devices/system/cpu/cpuX/microcode/reload sysfs nodes simultaneously. A more generic fix will follow. Cc: Henrique de Moraes Holschuh Cc: Peter Zijlstra Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1340280437-7718-2-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin Cc: --- arch/x86/kernel/microcode_core.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index fbdfc6917180..24b852b61be3 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -298,19 +298,31 @@ static ssize_t reload_store(struct device *dev, const char *buf, size_t size) { unsigned long val; - int cpu = dev->id; - ssize_t ret = 0; + int cpu; + ssize_t ret = 0, tmp_ret; + + /* allow reload only from the BSP */ + if (boot_cpu_data.cpu_index != dev->id) + return -EINVAL; ret = kstrtoul(buf, 0, &val); if (ret) return ret; - if (val == 1) { - get_online_cpus(); - if (cpu_online(cpu)) - ret = reload_for_cpu(cpu); - put_online_cpus(); + if (val != 1) + return size; + + get_online_cpus(); + for_each_online_cpu(cpu) { + tmp_ret = reload_for_cpu(cpu); + if (tmp_ret != 0) + pr_warn("Error reloading microcode on CPU %d\n", cpu); + + /* save retval of the first encountered reload error */ + if (!ret) + ret = tmp_ret; } + put_online_cpus(); if (!ret) ret = size; -- cgit v1.2.3 From 3d8986bc7f309483ee09c7a02888bab09072c19b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 21 Jun 2012 14:07:17 +0200 Subject: x86, microcode: Make reload interface per system The reload interface should be per-system so that a full system ucode reload happens (on each core) when doing echo 1 > /sys/devices/system/cpu/microcode/reload Move it to the cpu subsys directory instead of it being per-cpu. Cc: Henrique de Moraes Holschuh Cc: Peter Zijlstra Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1340280437-7718-3-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/microcode_core.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 24b852b61be3..947e4c64b1d8 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -301,10 +301,6 @@ static ssize_t reload_store(struct device *dev, int cpu; ssize_t ret = 0, tmp_ret; - /* allow reload only from the BSP */ - if (boot_cpu_data.cpu_index != dev->id) - return -EINVAL; - ret = kstrtoul(buf, 0, &val); if (ret) return ret; @@ -351,7 +347,6 @@ static DEVICE_ATTR(version, 0400, version_show, NULL); static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); static struct attribute *mc_default_attrs[] = { - &dev_attr_reload.attr, &dev_attr_version.attr, &dev_attr_processor_flags.attr, NULL @@ -528,6 +523,16 @@ static const struct x86_cpu_id microcode_id[] = { MODULE_DEVICE_TABLE(x86cpu, microcode_id); #endif +static struct attribute *cpu_root_microcode_attrs[] = { + &dev_attr_reload.attr, + NULL +}; + +static struct attribute_group cpu_root_microcode_group = { + .name = "microcode", + .attrs = cpu_root_microcode_attrs, +}; + static int __init microcode_init(void) { struct cpuinfo_x86 *c = &cpu_data(0); @@ -559,9 +564,17 @@ static int __init microcode_init(void) if (error) goto out_pdev; + error = sysfs_create_group(&cpu_subsys.dev_root->kobj, + &cpu_root_microcode_group); + + if (error) { + pr_err("Error creating microcode group!\n"); + goto out_driver; + } + error = microcode_dev_init(); if (error) - goto out_driver; + goto out_ucode_group; register_syscore_ops(&mc_syscore_ops); register_hotcpu_notifier(&mc_cpu_notifier); @@ -571,7 +584,11 @@ static int __init microcode_init(void) return 0; -out_driver: + out_ucode_group: + sysfs_remove_group(&cpu_subsys.dev_root->kobj, + &cpu_root_microcode_group); + + out_driver: get_online_cpus(); mutex_lock(µcode_mutex); @@ -580,7 +597,7 @@ out_driver: mutex_unlock(µcode_mutex); put_online_cpus(); -out_pdev: + out_pdev: platform_device_unregister(microcode_pdev); return error; @@ -596,6 +613,9 @@ static void __exit microcode_exit(void) unregister_hotcpu_notifier(&mc_cpu_notifier); unregister_syscore_ops(&mc_syscore_ops); + sysfs_remove_group(&cpu_subsys.dev_root->kobj, + &cpu_root_microcode_group); + get_online_cpus(); mutex_lock(µcode_mutex); -- cgit v1.2.3 From 17d7a1123f0f6d532830152564cc812cc73db2f3 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Mon, 2 Jul 2012 22:46:17 +0900 Subject: perf bench: Fix confused variable namings and descriptions in mem subsystem As Namhyung Kim pointed, there are confused namings and descriptions of words "cycle" and "clock" in mem-memset.c and mem-memcpy.c. With the option "-c" (or "--clock", now renamed as "--cycle"), mem subsystem measures cost of memset() and memcpy() with cpu-cycles event. But current mem subsystem source code contains lots of confused variable namings and descriptions with "clock" (e.g. the variable use_clock). This is a very bad style because there is another software event named "cpu-clock". This patch replaces wrong usage of "clock" to "cycle". v2: modified Documentation/perf-bench.txt for the descriptions of --cycle option Signed-off-by: Hitoshi Mitake Cc: Ingo Molnar Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1341236777-18457-1-git-send-email-h.mitake@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-bench.txt | 4 +- tools/perf/bench/mem-memcpy.c | 80 ++++++++++++++++----------------- tools/perf/bench/mem-memset.c | 80 ++++++++++++++++----------------- 3 files changed, 82 insertions(+), 82 deletions(-) diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index f3c716a4cad3..7065cd6fbdfc 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -144,7 +144,7 @@ On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported. Repeat memcpy invocation this number of times. -c:: ---clock:: +--cycle:: Use perf's cpu-cycles event instead of gettimeofday syscall. -o:: @@ -176,7 +176,7 @@ On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported. Repeat memset invocation this number of times. -c:: ---clock:: +--cycle:: Use perf's cpu-cycles event instead of gettimeofday syscall. -o:: diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index d990365cafa0..02dad5d3359b 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -24,8 +24,8 @@ static const char *length_str = "1MB"; static const char *routine = "default"; static int iterations = 1; -static bool use_clock; -static int clock_fd; +static bool use_cycle; +static int cycle_fd; static bool only_prefault; static bool no_prefault; @@ -37,7 +37,7 @@ static const struct option options[] = { "Specify routine to copy"), OPT_INTEGER('i', "iterations", &iterations, "repeat memcpy() invocation this number of times"), - OPT_BOOLEAN('c', "clock", &use_clock, + OPT_BOOLEAN('c', "cycle", &use_cycle, "Use cycles event instead of gettimeofday() for measuring"), OPT_BOOLEAN('o', "only-prefault", &only_prefault, "Show only the result with page faults before memcpy()"), @@ -76,27 +76,27 @@ static const char * const bench_mem_memcpy_usage[] = { NULL }; -static struct perf_event_attr clock_attr = { +static struct perf_event_attr cycle_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }; -static void init_clock(void) +static void init_cycle(void) { - clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); + cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0); - if (clock_fd < 0 && errno == ENOSYS) + if (cycle_fd < 0 && errno == ENOSYS) die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); else - BUG_ON(clock_fd < 0); + BUG_ON(cycle_fd < 0); } -static u64 get_clock(void) +static u64 get_cycle(void) { int ret; u64 clk; - ret = read(clock_fd, &clk, sizeof(u64)); + ret = read(cycle_fd, &clk, sizeof(u64)); BUG_ON(ret != sizeof(u64)); return clk; @@ -119,9 +119,9 @@ static void alloc_mem(void **dst, void **src, size_t length) die("memory allocation failed - maybe length is too large?\n"); } -static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) +static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault) { - u64 clock_start = 0ULL, clock_end = 0ULL; + u64 cycle_start = 0ULL, cycle_end = 0ULL; void *src = NULL, *dst = NULL; int i; @@ -130,14 +130,14 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) if (prefault) fn(dst, src, len); - clock_start = get_clock(); + cycle_start = get_cycle(); for (i = 0; i < iterations; ++i) fn(dst, src, len); - clock_end = get_clock(); + cycle_end = get_cycle(); free(src); free(dst); - return clock_end - clock_start; + return cycle_end - cycle_start; } static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) @@ -182,17 +182,17 @@ int bench_mem_memcpy(int argc, const char **argv, int i; size_t len; double result_bps[2]; - u64 result_clock[2]; + u64 result_cycle[2]; argc = parse_options(argc, argv, options, bench_mem_memcpy_usage, 0); - if (use_clock) - init_clock(); + if (use_cycle) + init_cycle(); len = (size_t)perf_atoll((char *)length_str); - result_clock[0] = result_clock[1] = 0ULL; + result_cycle[0] = result_cycle[1] = 0ULL; result_bps[0] = result_bps[1] = 0.0; if ((s64)len <= 0) { @@ -223,11 +223,11 @@ int bench_mem_memcpy(int argc, const char **argv, if (!only_prefault && !no_prefault) { /* show both of results */ - if (use_clock) { - result_clock[0] = - do_memcpy_clock(routines[i].fn, len, false); - result_clock[1] = - do_memcpy_clock(routines[i].fn, len, true); + if (use_cycle) { + result_cycle[0] = + do_memcpy_cycle(routines[i].fn, len, false); + result_cycle[1] = + do_memcpy_cycle(routines[i].fn, len, true); } else { result_bps[0] = do_memcpy_gettimeofday(routines[i].fn, @@ -237,9 +237,9 @@ int bench_mem_memcpy(int argc, const char **argv, len, true); } } else { - if (use_clock) { - result_clock[pf] = - do_memcpy_clock(routines[i].fn, + if (use_cycle) { + result_cycle[pf] = + do_memcpy_cycle(routines[i].fn, len, only_prefault); } else { result_bps[pf] = @@ -251,12 +251,12 @@ int bench_mem_memcpy(int argc, const char **argv, switch (bench_format) { case BENCH_FORMAT_DEFAULT: if (!only_prefault && !no_prefault) { - if (use_clock) { - printf(" %14lf Clock/Byte\n", - (double)result_clock[0] + if (use_cycle) { + printf(" %14lf Cycle/Byte\n", + (double)result_cycle[0] / (double)len); - printf(" %14lf Clock/Byte (with prefault)\n", - (double)result_clock[1] + printf(" %14lf Cycle/Byte (with prefault)\n", + (double)result_cycle[1] / (double)len); } else { print_bps(result_bps[0]); @@ -265,9 +265,9 @@ int bench_mem_memcpy(int argc, const char **argv, printf(" (with prefault)\n"); } } else { - if (use_clock) { - printf(" %14lf Clock/Byte", - (double)result_clock[pf] + if (use_cycle) { + printf(" %14lf Cycle/Byte", + (double)result_cycle[pf] / (double)len); } else print_bps(result_bps[pf]); @@ -277,17 +277,17 @@ int bench_mem_memcpy(int argc, const char **argv, break; case BENCH_FORMAT_SIMPLE: if (!only_prefault && !no_prefault) { - if (use_clock) { + if (use_cycle) { printf("%lf %lf\n", - (double)result_clock[0] / (double)len, - (double)result_clock[1] / (double)len); + (double)result_cycle[0] / (double)len, + (double)result_cycle[1] / (double)len); } else { printf("%lf %lf\n", result_bps[0], result_bps[1]); } } else { - if (use_clock) { - printf("%lf\n", (double)result_clock[pf] + if (use_cycle) { + printf("%lf\n", (double)result_cycle[pf] / (double)len); } else printf("%lf\n", result_bps[pf]); diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c index bf0d5f552017..350cc9557265 100644 --- a/tools/perf/bench/mem-memset.c +++ b/tools/perf/bench/mem-memset.c @@ -24,8 +24,8 @@ static const char *length_str = "1MB"; static const char *routine = "default"; static int iterations = 1; -static bool use_clock; -static int clock_fd; +static bool use_cycle; +static int cycle_fd; static bool only_prefault; static bool no_prefault; @@ -37,7 +37,7 @@ static const struct option options[] = { "Specify routine to set"), OPT_INTEGER('i', "iterations", &iterations, "repeat memset() invocation this number of times"), - OPT_BOOLEAN('c', "clock", &use_clock, + OPT_BOOLEAN('c', "cycle", &use_cycle, "Use cycles event instead of gettimeofday() for measuring"), OPT_BOOLEAN('o', "only-prefault", &only_prefault, "Show only the result with page faults before memset()"), @@ -76,27 +76,27 @@ static const char * const bench_mem_memset_usage[] = { NULL }; -static struct perf_event_attr clock_attr = { +static struct perf_event_attr cycle_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }; -static void init_clock(void) +static void init_cycle(void) { - clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); + cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0); - if (clock_fd < 0 && errno == ENOSYS) + if (cycle_fd < 0 && errno == ENOSYS) die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); else - BUG_ON(clock_fd < 0); + BUG_ON(cycle_fd < 0); } -static u64 get_clock(void) +static u64 get_cycle(void) { int ret; u64 clk; - ret = read(clock_fd, &clk, sizeof(u64)); + ret = read(cycle_fd, &clk, sizeof(u64)); BUG_ON(ret != sizeof(u64)); return clk; @@ -115,9 +115,9 @@ static void alloc_mem(void **dst, size_t length) die("memory allocation failed - maybe length is too large?\n"); } -static u64 do_memset_clock(memset_t fn, size_t len, bool prefault) +static u64 do_memset_cycle(memset_t fn, size_t len, bool prefault) { - u64 clock_start = 0ULL, clock_end = 0ULL; + u64 cycle_start = 0ULL, cycle_end = 0ULL; void *dst = NULL; int i; @@ -126,13 +126,13 @@ static u64 do_memset_clock(memset_t fn, size_t len, bool prefault) if (prefault) fn(dst, -1, len); - clock_start = get_clock(); + cycle_start = get_cycle(); for (i = 0; i < iterations; ++i) fn(dst, i, len); - clock_end = get_clock(); + cycle_end = get_cycle(); free(dst); - return clock_end - clock_start; + return cycle_end - cycle_start; } static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) @@ -176,17 +176,17 @@ int bench_mem_memset(int argc, const char **argv, int i; size_t len; double result_bps[2]; - u64 result_clock[2]; + u64 result_cycle[2]; argc = parse_options(argc, argv, options, bench_mem_memset_usage, 0); - if (use_clock) - init_clock(); + if (use_cycle) + init_cycle(); len = (size_t)perf_atoll((char *)length_str); - result_clock[0] = result_clock[1] = 0ULL; + result_cycle[0] = result_cycle[1] = 0ULL; result_bps[0] = result_bps[1] = 0.0; if ((s64)len <= 0) { @@ -217,11 +217,11 @@ int bench_mem_memset(int argc, const char **argv, if (!only_prefault && !no_prefault) { /* show both of results */ - if (use_clock) { - result_clock[0] = - do_memset_clock(routines[i].fn, len, false); - result_clock[1] = - do_memset_clock(routines[i].fn, len, true); + if (use_cycle) { + result_cycle[0] = + do_memset_cycle(routines[i].fn, len, false); + result_cycle[1] = + do_memset_cycle(routines[i].fn, len, true); } else { result_bps[0] = do_memset_gettimeofday(routines[i].fn, @@ -231,9 +231,9 @@ int bench_mem_memset(int argc, const char **argv, len, true); } } else { - if (use_clock) { - result_clock[pf] = - do_memset_clock(routines[i].fn, + if (use_cycle) { + result_cycle[pf] = + do_memset_cycle(routines[i].fn, len, only_prefault); } else { result_bps[pf] = @@ -245,12 +245,12 @@ int bench_mem_memset(int argc, const char **argv, switch (bench_format) { case BENCH_FORMAT_DEFAULT: if (!only_prefault && !no_prefault) { - if (use_clock) { - printf(" %14lf Clock/Byte\n", - (double)result_clock[0] + if (use_cycle) { + printf(" %14lf Cycle/Byte\n", + (double)result_cycle[0] / (double)len); - printf(" %14lf Clock/Byte (with prefault)\n ", - (double)result_clock[1] + printf(" %14lf Cycle/Byte (with prefault)\n ", + (double)result_cycle[1] / (double)len); } else { print_bps(result_bps[0]); @@ -259,9 +259,9 @@ int bench_mem_memset(int argc, const char **argv, printf(" (with prefault)\n"); } } else { - if (use_clock) { - printf(" %14lf Clock/Byte", - (double)result_clock[pf] + if (use_cycle) { + printf(" %14lf Cycle/Byte", + (double)result_cycle[pf] / (double)len); } else print_bps(result_bps[pf]); @@ -271,17 +271,17 @@ int bench_mem_memset(int argc, const char **argv, break; case BENCH_FORMAT_SIMPLE: if (!only_prefault && !no_prefault) { - if (use_clock) { + if (use_cycle) { printf("%lf %lf\n", - (double)result_clock[0] / (double)len, - (double)result_clock[1] / (double)len); + (double)result_cycle[0] / (double)len, + (double)result_cycle[1] / (double)len); } else { printf("%lf %lf\n", result_bps[0], result_bps[1]); } } else { - if (use_clock) { - printf("%lf\n", (double)result_clock[pf] + if (use_cycle) { + printf("%lf\n", (double)result_cycle[pf] / (double)len); } else printf("%lf\n", result_bps[pf]); -- cgit v1.2.3 From f885b7f2b2de70be266d2cecc476f773a1e2ca5d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 23 Apr 2012 15:52:53 -0700 Subject: rcu: Control RCU_FANOUT_LEAF from boot-time parameter Although making RCU_FANOUT_LEAF a kernel configuration parameter rather than a fixed constant makes it easier for people to decrease cache-miss overhead for large systems, it is of little help for people who must run a single pre-built kernel binary. This commit therefore allows the value of RCU_FANOUT_LEAF to be increased (but not decreased!) via a boot-time parameter named rcutree.rcu_fanout_leaf. Reported-by: Mike Galbraith Signed-off-by: Paul E. McKenney --- Documentation/kernel-parameters.txt | 5 ++ kernel/rcutree.c | 97 +++++++++++++++++++++++++++++++------ kernel/rcutree.h | 23 +++++---- kernel/rcutree_plugin.h | 2 + kernel/rcutree_trace.c | 2 +- 5 files changed, 104 insertions(+), 25 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a92c5ebf373e..12783fa833c3 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2367,6 +2367,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Set maximum number of finished RCU callbacks to process in one batch. + rcutree.fanout_leaf= [KNL,BOOT] + Increase the number of CPUs assigned to each + leaf rcu_node structure. Useful for very large + systems. + rcutree.qhimark= [KNL,BOOT] Set threshold of queued RCU callbacks over which batch limiting is disabled. diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4b97bba7396e..a4c592b66e10 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -60,17 +60,10 @@ /* Data structures. */ -static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; +static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; #define RCU_STATE_INITIALIZER(structname) { \ .level = { &structname##_state.node[0] }, \ - .levelcnt = { \ - NUM_RCU_LVL_0, /* root of hierarchy. */ \ - NUM_RCU_LVL_1, \ - NUM_RCU_LVL_2, \ - NUM_RCU_LVL_3, \ - NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ - }, \ .fqs_state = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ @@ -91,6 +84,19 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); static struct rcu_state *rcu_state; +/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ +static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; +module_param(rcu_fanout_leaf, int, 0); +int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; +static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ + NUM_RCU_LVL_0, + NUM_RCU_LVL_1, + NUM_RCU_LVL_2, + NUM_RCU_LVL_3, + NUM_RCU_LVL_4, +}; +int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ + /* * The rcu_scheduler_active variable transitions from zero to one just * before the first task is spawned. So when this variable is zero, RCU @@ -2574,9 +2580,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) { int i; - for (i = NUM_RCU_LVLS - 1; i > 0; i--) + for (i = rcu_num_lvls - 1; i > 0; i--) rsp->levelspread[i] = CONFIG_RCU_FANOUT; - rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; + rsp->levelspread[0] = rcu_fanout_leaf; } #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ static void __init rcu_init_levelspread(struct rcu_state *rsp) @@ -2586,7 +2592,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) int i; cprv = NR_CPUS; - for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { + for (i = rcu_num_lvls - 1; i >= 0; i--) { ccur = rsp->levelcnt[i]; rsp->levelspread[i] = (cprv + ccur - 1) / ccur; cprv = ccur; @@ -2613,13 +2619,15 @@ static void __init rcu_init_one(struct rcu_state *rsp, /* Initialize the level-tracking arrays. */ - for (i = 1; i < NUM_RCU_LVLS; i++) + for (i = 0; i < rcu_num_lvls; i++) + rsp->levelcnt[i] = num_rcu_lvl[i]; + for (i = 1; i < rcu_num_lvls; i++) rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; rcu_init_levelspread(rsp); /* Initialize the elements themselves, starting from the leaves. */ - for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { + for (i = rcu_num_lvls - 1; i >= 0; i--) { cpustride *= rsp->levelspread[i]; rnp = rsp->level[i]; for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { @@ -2649,7 +2657,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, } rsp->rda = rda; - rnp = rsp->level[NUM_RCU_LVLS - 1]; + rnp = rsp->level[rcu_num_lvls - 1]; for_each_possible_cpu(i) { while (i > rnp->grphi) rnp++; @@ -2658,11 +2666,72 @@ static void __init rcu_init_one(struct rcu_state *rsp, } } +/* + * Compute the rcu_node tree geometry from kernel parameters. This cannot + * replace the definitions in rcutree.h because those are needed to size + * the ->node array in the rcu_state structure. + */ +static void __init rcu_init_geometry(void) +{ + int i; + int j; + int n = NR_CPUS; + int rcu_capacity[MAX_RCU_LVLS + 1]; + + /* If the compile-time values are accurate, just leave. */ + if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF) + return; + + /* + * Compute number of nodes that can be handled an rcu_node tree + * with the given number of levels. Setting rcu_capacity[0] makes + * some of the arithmetic easier. + */ + rcu_capacity[0] = 1; + rcu_capacity[1] = rcu_fanout_leaf; + for (i = 2; i <= MAX_RCU_LVLS; i++) + rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT; + + /* + * The boot-time rcu_fanout_leaf parameter is only permitted + * to increase the leaf-level fanout, not decrease it. Of course, + * the leaf-level fanout cannot exceed the number of bits in + * the rcu_node masks. Finally, the tree must be able to accommodate + * the configured number of CPUs. Complain and fall back to the + * compile-time values if these limits are exceeded. + */ + if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF || + rcu_fanout_leaf > sizeof(unsigned long) * 8 || + n > rcu_capacity[MAX_RCU_LVLS]) { + WARN_ON(1); + return; + } + + /* Calculate the number of rcu_nodes at each level of the tree. */ + for (i = 1; i <= MAX_RCU_LVLS; i++) + if (n <= rcu_capacity[i]) { + for (j = 0; j <= i; j++) + num_rcu_lvl[j] = + DIV_ROUND_UP(n, rcu_capacity[i - j]); + rcu_num_lvls = i; + for (j = i + 1; j <= MAX_RCU_LVLS; j++) + num_rcu_lvl[j] = 0; + break; + } + + /* Calculate the total number of rcu_node structures. */ + rcu_num_nodes = 0; + for (i = 0; i <= MAX_RCU_LVLS; i++) + rcu_num_nodes += num_rcu_lvl[i]; + rcu_num_nodes -= n; +} + void __init rcu_init(void) { int cpu; rcu_bootup_announce(); + rcu_init_geometry(); rcu_init_one(&rcu_sched_state, &rcu_sched_data); rcu_init_one(&rcu_bh_state, &rcu_bh_data); __rcu_init_preempt(); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 19b61ac1079f..780a0195d35a 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -42,28 +42,28 @@ #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) #if NR_CPUS <= RCU_FANOUT_1 -# define NUM_RCU_LVLS 1 +# define RCU_NUM_LVLS 1 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 (NR_CPUS) # define NUM_RCU_LVL_2 0 # define NUM_RCU_LVL_3 0 # define NUM_RCU_LVL_4 0 #elif NR_CPUS <= RCU_FANOUT_2 -# define NUM_RCU_LVLS 2 +# define RCU_NUM_LVLS 2 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) # define NUM_RCU_LVL_2 (NR_CPUS) # define NUM_RCU_LVL_3 0 # define NUM_RCU_LVL_4 0 #elif NR_CPUS <= RCU_FANOUT_3 -# define NUM_RCU_LVLS 3 +# define RCU_NUM_LVLS 3 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) # define NUM_RCU_LVL_3 (NR_CPUS) # define NUM_RCU_LVL_4 0 #elif NR_CPUS <= RCU_FANOUT_4 -# define NUM_RCU_LVLS 4 +# define RCU_NUM_LVLS 4 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) @@ -76,6 +76,9 @@ #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) +extern int rcu_num_lvls; +extern int rcu_num_nodes; + /* * Dynticks per-CPU state. */ @@ -206,7 +209,7 @@ struct rcu_node { */ #define rcu_for_each_node_breadth_first(rsp, rnp) \ for ((rnp) = &(rsp)->node[0]; \ - (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) + (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) /* * Do a breadth-first scan of the non-leaf rcu_node structures for the @@ -215,7 +218,7 @@ struct rcu_node { */ #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ for ((rnp) = &(rsp)->node[0]; \ - (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) + (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++) /* * Scan the leaves of the rcu_node hierarchy for the specified rcu_state @@ -224,8 +227,8 @@ struct rcu_node { * It is still a leaf node, even if it is also the root node. */ #define rcu_for_each_leaf_node(rsp, rnp) \ - for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ - (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) + for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ + (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) /* Index values for nxttail array in struct rcu_data. */ #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ @@ -357,9 +360,9 @@ do { \ */ struct rcu_state { struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ - struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ + struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */ u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ - u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ + u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ /* The following fields are guarded by the root rcu_node's lock. */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3e4899459f3d..fb92b6dd9980 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -70,6 +70,8 @@ static void __init rcu_bootup_announce_oddness(void) #if NUM_RCU_LVL_4 != 0 printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); #endif + if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) + printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); } #ifdef CONFIG_TREE_PREEMPT_RCU diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index d4bc16ddd1d4..a3556a2d842c 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -278,7 +278,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) rsp->n_force_qs, rsp->n_force_qs_ngp, rsp->n_force_qs - rsp->n_force_qs_ngp, rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen); - for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { + for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) { if (rnp->level != level) { seq_puts(m, "\n"); level = rnp->level; -- cgit v1.2.3 From cc5df65b0370fc6aa2bfe3bb19e0451d5cafb99f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 15 Jun 2012 18:16:00 -0700 Subject: rcu: Four-level hierarchy is no longer experimental Time to make the four-level-hierarchy setting less scary, so this commit removes "Experimental" from the boot-time message. Leave the message in order to get a heads-up on any possible need to expand to a five-level hierarchy. Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index fb92b6dd9980..70e0fd256cc6 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -68,7 +68,7 @@ static void __init rcu_bootup_announce_oddness(void) printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n"); #endif #if NUM_RCU_LVL_4 != 0 - printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); + printk(KERN_INFO "\tFour-level hierarchy is enabled.\n"); #endif if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); -- cgit v1.2.3 From cca6f3931920a7547d02e68adc2ca635bea5600c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 8 May 2012 21:00:28 -0700 Subject: rcu: Size rcu_node tree from nr_cpu_ids rather than NR_CPUS The rcu_node tree array is sized based on compile-time constants, including NR_CPUS. Although this approach has worked well in the past, the recent trend by many distros to define NR_CPUS=4096 results in excessive grace-period-initialization latencies. This commit therefore substitutes the run-time computed nr_cpu_ids for the compile-time NR_CPUS when building the tree. This can result in much of the compile-time-allocated rcu_node array being unused. If this is a major problem, you are in a specialized situation anyway, so you can manually adjust the NR_CPUS, RCU_FANOUT, and RCU_FANOUT_LEAF kernel config parameters. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 2 +- kernel/rcutree_plugin.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index a4c592b66e10..0fdbc5e07302 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2675,7 +2675,7 @@ static void __init rcu_init_geometry(void) { int i; int j; - int n = NR_CPUS; + int n = nr_cpu_ids; int rcu_capacity[MAX_RCU_LVLS + 1]; /* If the compile-time values are accurate, just leave. */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 70e0fd256cc6..ef2b5231afa4 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -72,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void) #endif if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); + if (nr_cpu_ids != NR_CPUS) + printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); } #ifdef CONFIG_TREE_PREEMPT_RCU -- cgit v1.2.3 From 6c90cc7bf077f28144013e949ee0c122012d194a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 May 2012 19:41:41 -0700 Subject: rcu: Prevent excessive line length in RCU_STATE_INITIALIZER() Upcoming rcu_barrier() concurrency commits will result in line lengths greater than 80 characters in the RCU_STATE_INITIALIZER(), so this commit shortens the name of the macro's argument to prevent this. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0fdbc5e07302..dd7fd96c90c5 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -62,18 +62,18 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; -#define RCU_STATE_INITIALIZER(structname) { \ - .level = { &structname##_state.node[0] }, \ +#define RCU_STATE_INITIALIZER(sname) { \ + .level = { &sname##_state.node[0] }, \ .fqs_state = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ - .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ - .orphan_nxttail = &structname##_state.orphan_nxtlist, \ - .orphan_donetail = &structname##_state.orphan_donelist, \ - .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ + .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \ + .orphan_nxttail = &sname##_state.orphan_nxtlist, \ + .orphan_donetail = &sname##_state.orphan_donelist, \ + .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \ .n_force_qs = 0, \ .n_force_qs_ngp = 0, \ - .name = #structname, \ + .name = #sname, \ } struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); -- cgit v1.2.3 From 037b64ed0bf2405a1a01542164d3418564b44fff Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 May 2012 23:26:01 -0700 Subject: rcu: Place pointer to call_rcu() in rcu_data structure This is a preparatory commit for increasing rcu_barrier()'s concurrency. It adds a pointer in the rcu_data structure to the corresponding call_rcu() function. This allows a pointer to the rcu_data structure to imply the function pointer, which allows _rcu_barrier() state to be placed in the rcu_state structure. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 27 ++++++++++++--------------- kernel/rcutree.h | 2 ++ kernel/rcutree_plugin.h | 5 +++-- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index dd7fd96c90c5..00c518fa34bb 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -62,8 +62,9 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; -#define RCU_STATE_INITIALIZER(sname) { \ +#define RCU_STATE_INITIALIZER(sname, cr) { \ .level = { &sname##_state.node[0] }, \ + .call = cr, \ .fqs_state = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ @@ -76,10 +77,11 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; .name = #sname, \ } -struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); +struct rcu_state rcu_sched_state = + RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); -struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); +struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); static struct rcu_state *rcu_state; @@ -2282,21 +2284,17 @@ static void rcu_barrier_func(void *type) { int cpu = smp_processor_id(); struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); - void (*call_rcu_func)(struct rcu_head *head, - void (*func)(struct rcu_head *head)); + struct rcu_state *rsp = type; atomic_inc(&rcu_barrier_cpu_count); - call_rcu_func = type; - call_rcu_func(head, rcu_barrier_callback); + rsp->call(head, rcu_barrier_callback); } /* * Orchestrate the specified type of RCU barrier, waiting for all * RCU callbacks of the specified type to complete. */ -static void _rcu_barrier(struct rcu_state *rsp, - void (*call_rcu_func)(struct rcu_head *head, - void (*func)(struct rcu_head *head))) +static void _rcu_barrier(struct rcu_state *rsp) { int cpu; unsigned long flags; @@ -2348,8 +2346,7 @@ static void _rcu_barrier(struct rcu_state *rsp, while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) schedule_timeout_interruptible(1); } else if (ACCESS_ONCE(rdp->qlen)) { - smp_call_function_single(cpu, rcu_barrier_func, - (void *)call_rcu_func, 1); + smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); preempt_enable(); } else { preempt_enable(); @@ -2370,7 +2367,7 @@ static void _rcu_barrier(struct rcu_state *rsp, raw_spin_unlock_irqrestore(&rsp->onofflock, flags); atomic_inc(&rcu_barrier_cpu_count); smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ - call_rcu_func(&rh, rcu_barrier_callback); + rsp->call(&rh, rcu_barrier_callback); /* * Now that we have an rcu_barrier_callback() callback on each @@ -2393,7 +2390,7 @@ static void _rcu_barrier(struct rcu_state *rsp, */ void rcu_barrier_bh(void) { - _rcu_barrier(&rcu_bh_state, call_rcu_bh); + _rcu_barrier(&rcu_bh_state); } EXPORT_SYMBOL_GPL(rcu_barrier_bh); @@ -2402,7 +2399,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh); */ void rcu_barrier_sched(void) { - _rcu_barrier(&rcu_sched_state, call_rcu_sched); + _rcu_barrier(&rcu_sched_state); } EXPORT_SYMBOL_GPL(rcu_barrier_sched); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 780a0195d35a..049896a835d9 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -364,6 +364,8 @@ struct rcu_state { u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ + void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ + void (*func)(struct rcu_head *head)); /* The following fields are guarded by the root rcu_node's lock. */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index ef2b5231afa4..9cb3a68819fa 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -78,7 +78,8 @@ static void __init rcu_bootup_announce_oddness(void) #ifdef CONFIG_TREE_PREEMPT_RCU -struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); +struct rcu_state rcu_preempt_state = + RCU_STATE_INITIALIZER(rcu_preempt, call_rcu); DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); static struct rcu_state *rcu_state = &rcu_preempt_state; @@ -944,7 +945,7 @@ static int rcu_preempt_cpu_has_callbacks(int cpu) */ void rcu_barrier(void) { - _rcu_barrier(&rcu_preempt_state, call_rcu); + _rcu_barrier(&rcu_preempt_state); } EXPORT_SYMBOL_GPL(rcu_barrier); -- cgit v1.2.3 From 06668efa9180f4824fe846a8ff96338c18646bc7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 May 2012 23:57:46 -0700 Subject: rcu: Move _rcu_barrier()'s rcu_head structures to rcu_data structures In order for multiple flavors of RCU to each concurrently run one rcu_barrier(), each flavor needs its own per-CPU set of rcu_head structures. This commit therefore moves _rcu_barrier()'s set of per-CPU rcu_head structures from per-CPU variables to the existing per-CPU and per-RCU-flavor rcu_data structures. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 6 ++---- kernel/rcutree.h | 3 +++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 00c518fa34bb..1e552598b55d 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -157,7 +157,6 @@ unsigned long rcutorture_vernum; /* State information for rcu_barrier() and friends. */ -static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); static struct completion rcu_barrier_completion; @@ -2282,12 +2281,11 @@ static void rcu_barrier_callback(struct rcu_head *notused) */ static void rcu_barrier_func(void *type) { - int cpu = smp_processor_id(); - struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); struct rcu_state *rsp = type; + struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); atomic_inc(&rcu_barrier_cpu_count); - rsp->call(head, rcu_barrier_callback); + rsp->call(&rdp->barrier_head, rcu_barrier_callback); } /* diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 049896a835d9..586d93c978f2 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -314,6 +314,9 @@ struct rcu_data { unsigned long n_rp_need_fqs; unsigned long n_rp_need_nothing; + /* 6) _rcu_barrier() callback. */ + struct rcu_head barrier_head; + int cpu; struct rcu_state *rsp; }; -- cgit v1.2.3 From 24ebbca8ecdd5129d7f829a7cb5146aaeb531f77 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 May 2012 00:34:56 -0700 Subject: rcu: Move rcu_barrier_cpu_count to rcu_state structure In order to allow each RCU flavor to concurrently execute its rcu_barrier() function, it is necessary to move the relevant state to the rcu_state structure. This commit therefore moves the rcu_barrier_cpu_count global variable to a new ->barrier_cpu_count field in the rcu_state structure. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 25 ++++++++++++++----------- kernel/rcutree.h | 1 + 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 1e552598b55d..5929b021666d 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -157,7 +157,6 @@ unsigned long rcutorture_vernum; /* State information for rcu_barrier() and friends. */ -static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); static struct completion rcu_barrier_completion; @@ -2270,9 +2269,12 @@ static int rcu_cpu_has_callbacks(int cpu) * RCU callback function for _rcu_barrier(). If we are last, wake * up the task executing _rcu_barrier(). */ -static void rcu_barrier_callback(struct rcu_head *notused) +static void rcu_barrier_callback(struct rcu_head *rhp) { - if (atomic_dec_and_test(&rcu_barrier_cpu_count)) + struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head); + struct rcu_state *rsp = rdp->rsp; + + if (atomic_dec_and_test(&rsp->barrier_cpu_count)) complete(&rcu_barrier_completion); } @@ -2284,7 +2286,7 @@ static void rcu_barrier_func(void *type) struct rcu_state *rsp = type; struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); - atomic_inc(&rcu_barrier_cpu_count); + atomic_inc(&rsp->barrier_cpu_count); rsp->call(&rdp->barrier_head, rcu_barrier_callback); } @@ -2297,9 +2299,9 @@ static void _rcu_barrier(struct rcu_state *rsp) int cpu; unsigned long flags; struct rcu_data *rdp; - struct rcu_head rh; + struct rcu_data rd; - init_rcu_head_on_stack(&rh); + init_rcu_head_on_stack(&rd.barrier_head); /* Take mutex to serialize concurrent rcu_barrier() requests. */ mutex_lock(&rcu_barrier_mutex); @@ -2324,7 +2326,7 @@ static void _rcu_barrier(struct rcu_state *rsp) * us -- but before CPU 1's orphaned callbacks are invoked!!! */ init_completion(&rcu_barrier_completion); - atomic_set(&rcu_barrier_cpu_count, 1); + atomic_set(&rsp->barrier_cpu_count, 1); raw_spin_lock_irqsave(&rsp->onofflock, flags); rsp->rcu_barrier_in_progress = current; raw_spin_unlock_irqrestore(&rsp->onofflock, flags); @@ -2363,15 +2365,16 @@ static void _rcu_barrier(struct rcu_state *rsp) rcu_adopt_orphan_cbs(rsp); rsp->rcu_barrier_in_progress = NULL; raw_spin_unlock_irqrestore(&rsp->onofflock, flags); - atomic_inc(&rcu_barrier_cpu_count); + atomic_inc(&rsp->barrier_cpu_count); smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ - rsp->call(&rh, rcu_barrier_callback); + rd.rsp = rsp; + rsp->call(&rd.barrier_head, rcu_barrier_callback); /* * Now that we have an rcu_barrier_callback() callback on each * CPU, and thus each counted, remove the initial count. */ - if (atomic_dec_and_test(&rcu_barrier_cpu_count)) + if (atomic_dec_and_test(&rsp->barrier_cpu_count)) complete(&rcu_barrier_completion); /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ @@ -2380,7 +2383,7 @@ static void _rcu_barrier(struct rcu_state *rsp) /* Other rcu_barrier() invocations can now safely proceed. */ mutex_unlock(&rcu_barrier_mutex); - destroy_rcu_head_on_stack(&rh); + destroy_rcu_head_on_stack(&rd.barrier_head); } /** diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 586d93c978f2..c57ef0b7f097 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -400,6 +400,7 @@ struct rcu_state { struct task_struct *rcu_barrier_in_progress; /* Task doing rcu_barrier(), */ /* or NULL if no barrier. */ + atomic_t barrier_cpu_count; /* # CPUs waiting on. */ raw_spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ -- cgit v1.2.3 From 7db74df88b52844f4e966901e2972bba725e6766 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 May 2012 03:03:37 -0700 Subject: rcu: Move rcu_barrier_completion to rcu_state structure In order to allow each RCU flavor to concurrently execute its rcu_barrier() function, it is necessary to move the relevant state to the rcu_state structure. This commit therefore moves the rcu_barrier_completion global variable to a new ->barrier_completion field in the rcu_state structure. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 9 ++++----- kernel/rcutree.h | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 5929b021666d..ca7d1678ac79 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -158,7 +158,6 @@ unsigned long rcutorture_vernum; /* State information for rcu_barrier() and friends. */ static DEFINE_MUTEX(rcu_barrier_mutex); -static struct completion rcu_barrier_completion; /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s @@ -2275,7 +2274,7 @@ static void rcu_barrier_callback(struct rcu_head *rhp) struct rcu_state *rsp = rdp->rsp; if (atomic_dec_and_test(&rsp->barrier_cpu_count)) - complete(&rcu_barrier_completion); + complete(&rsp->barrier_completion); } /* @@ -2325,7 +2324,7 @@ static void _rcu_barrier(struct rcu_state *rsp) * 6. Both rcu_barrier_callback() callbacks are invoked, awakening * us -- but before CPU 1's orphaned callbacks are invoked!!! */ - init_completion(&rcu_barrier_completion); + init_completion(&rsp->barrier_completion); atomic_set(&rsp->barrier_cpu_count, 1); raw_spin_lock_irqsave(&rsp->onofflock, flags); rsp->rcu_barrier_in_progress = current; @@ -2375,10 +2374,10 @@ static void _rcu_barrier(struct rcu_state *rsp) * CPU, and thus each counted, remove the initial count. */ if (atomic_dec_and_test(&rsp->barrier_cpu_count)) - complete(&rcu_barrier_completion); + complete(&rsp->barrier_completion); /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ - wait_for_completion(&rcu_barrier_completion); + wait_for_completion(&rsp->barrier_completion); /* Other rcu_barrier() invocations can now safely proceed. */ mutex_unlock(&rcu_barrier_mutex); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index c57ef0b7f097..d1ca4424122b 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -401,6 +401,7 @@ struct rcu_state { /* Task doing rcu_barrier(), */ /* or NULL if no barrier. */ atomic_t barrier_cpu_count; /* # CPUs waiting on. */ + struct completion barrier_completion; /* Wake at barrier end. */ raw_spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ -- cgit v1.2.3 From 7be7f0be907224445acc62b3884c892f38b7ff40 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 May 2012 05:18:53 -0700 Subject: rcu: Move rcu_barrier_mutex to rcu_state structure In order to allow each RCU flavor to concurrently execute its rcu_barrier() function, it is necessary to move the relevant state to the rcu_state structure. This commit therefore moves the rcu_barrier_mutex global variable to a new ->barrier_mutex field in the rcu_state structure. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 9 +++------ kernel/rcutree.h | 1 + 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ca7d1678ac79..ff992ac24e73 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -71,6 +71,7 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \ .orphan_nxttail = &sname##_state.orphan_nxtlist, \ .orphan_donetail = &sname##_state.orphan_donelist, \ + .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \ .n_force_qs = 0, \ .n_force_qs_ngp = 0, \ @@ -155,10 +156,6 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); unsigned long rcutorture_testseq; unsigned long rcutorture_vernum; -/* State information for rcu_barrier() and friends. */ - -static DEFINE_MUTEX(rcu_barrier_mutex); - /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s * permit this function to be invoked without holding the root rcu_node @@ -2303,7 +2300,7 @@ static void _rcu_barrier(struct rcu_state *rsp) init_rcu_head_on_stack(&rd.barrier_head); /* Take mutex to serialize concurrent rcu_barrier() requests. */ - mutex_lock(&rcu_barrier_mutex); + mutex_lock(&rsp->barrier_mutex); smp_mb(); /* Prevent any prior operations from leaking in. */ @@ -2380,7 +2377,7 @@ static void _rcu_barrier(struct rcu_state *rsp) wait_for_completion(&rsp->barrier_completion); /* Other rcu_barrier() invocations can now safely proceed. */ - mutex_unlock(&rcu_barrier_mutex); + mutex_unlock(&rsp->barrier_mutex); destroy_rcu_head_on_stack(&rd.barrier_head); } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index d1ca4424122b..7641aec3e59c 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -400,6 +400,7 @@ struct rcu_state { struct task_struct *rcu_barrier_in_progress; /* Task doing rcu_barrier(), */ /* or NULL if no barrier. */ + struct mutex barrier_mutex; /* Guards barrier fields. */ atomic_t barrier_cpu_count; /* # CPUs waiting on. */ struct completion barrier_completion; /* Wake at barrier end. */ raw_spinlock_t fqslock; /* Only one task forcing */ -- cgit v1.2.3 From cfed0a85dad921c683e9c0d25b072bcc5745ede0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 15 Jun 2012 18:22:05 -0700 Subject: rcu: Remove needless initialization For global variables, C defaults all fields to zero. The initialization of the rcu_state structure's ->n_force_qs and ->n_force_qs_ngp fields is therefore redundant, so this commit removes these initializations. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ff992ac24e73..44a8fda9be86 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -73,8 +73,6 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; .orphan_donetail = &sname##_state.orphan_donelist, \ .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \ - .n_force_qs = 0, \ - .n_force_qs_ngp = 0, \ .name = #sname, \ } -- cgit v1.2.3 From cf3a9c4842b1e097dbe0854933c471d43dd24f69 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 May 2012 14:56:46 -0700 Subject: rcu: Increase rcu_barrier() concurrency The traditional rcu_barrier() implementation has serialized all requests, regardless of RCU flavor, and also does not coalesce concurrent requests. In the past, this has been good and sufficient. However, systems are getting larger and use of rcu_barrier() has been increasing. This commit therefore introduces a counter-based scheme that allows _rcu_barrier() calls for the same flavor of RCU to take advantage of each others' work. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 36 +++++++++++++++++++++++++++++++++++- kernel/rcutree.h | 2 ++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 44a8fda9be86..6bb5d562253f 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2294,13 +2294,41 @@ static void _rcu_barrier(struct rcu_state *rsp) unsigned long flags; struct rcu_data *rdp; struct rcu_data rd; + unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); + unsigned long snap_done; init_rcu_head_on_stack(&rd.barrier_head); /* Take mutex to serialize concurrent rcu_barrier() requests. */ mutex_lock(&rsp->barrier_mutex); - smp_mb(); /* Prevent any prior operations from leaking in. */ + /* + * Ensure that all prior references, including to ->n_barrier_done, + * are ordered before the _rcu_barrier() machinery. + */ + smp_mb(); /* See above block comment. */ + + /* + * Recheck ->n_barrier_done to see if others did our work for us. + * This means checking ->n_barrier_done for an even-to-odd-to-even + * transition. The "if" expression below therefore rounds the old + * value up to the next even number and adds two before comparing. + */ + snap_done = ACCESS_ONCE(rsp->n_barrier_done); + if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) { + smp_mb(); /* caller's subsequent code after above check. */ + mutex_unlock(&rsp->barrier_mutex); + return; + } + + /* + * Increment ->n_barrier_done to avoid duplicate work. Use + * ACCESS_ONCE() to prevent the compiler from speculating + * the increment to precede the early-exit check. + */ + ACCESS_ONCE(rsp->n_barrier_done)++; + WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1); + smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */ /* * Initialize the count to one rather than to zero in order to @@ -2371,6 +2399,12 @@ static void _rcu_barrier(struct rcu_state *rsp) if (atomic_dec_and_test(&rsp->barrier_cpu_count)) complete(&rsp->barrier_completion); + /* Increment ->n_barrier_done to prevent duplicate work. */ + smp_mb(); /* Keep increment after above mechanism. */ + ACCESS_ONCE(rsp->n_barrier_done)++; + WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0); + smp_mb(); /* Keep increment before caller's subsequent code. */ + /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ wait_for_completion(&rsp->barrier_completion); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7641aec3e59c..be10286ad380 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -403,6 +403,8 @@ struct rcu_state { struct mutex barrier_mutex; /* Guards barrier fields. */ atomic_t barrier_cpu_count; /* # CPUs waiting on. */ struct completion barrier_completion; /* Wake at barrier end. */ + unsigned long n_barrier_done; /* ++ at start and end of */ + /* _rcu_barrier(). */ raw_spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ -- cgit v1.2.3 From a83eff0a82a7f3f14fea477fd41e6c082e7fc96a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 23 May 2012 18:47:05 -0700 Subject: rcu: Add tracing for _rcu_barrier() This commit adds event tracing for _rcu_barrier() execution. This is defined only if RCU_TRACE=y. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/trace/events/rcu.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ kernel/rcutree.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index d274734b2aa4..5bde94d8585b 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -541,6 +541,50 @@ TRACE_EVENT(rcu_torture_read, __entry->rcutorturename, __entry->rhp) ); +/* + * Tracepoint for _rcu_barrier() execution. The string "s" describes + * the _rcu_barrier phase: + * "Begin": rcu_barrier_callback() started. + * "Check": rcu_barrier_callback() checking for piggybacking. + * "EarlyExit": rcu_barrier_callback() piggybacked, thus early exit. + * "Inc1": rcu_barrier_callback() piggyback check counter incremented. + * "Offline": rcu_barrier_callback() found offline CPU + * "OnlineQ": rcu_barrier_callback() found online CPU with callbacks. + * "OnlineNQ": rcu_barrier_callback() found online CPU, no callbacks. + * "IRQ": An rcu_barrier_callback() callback posted on remote CPU. + * "CB": An rcu_barrier_callback() invoked a callback, not the last. + * "LastCB": An rcu_barrier_callback() invoked the last callback. + * "Inc2": rcu_barrier_callback() piggyback check counter incremented. + * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument + * is the count of remaining callbacks, and "done" is the piggybacking count. + */ +TRACE_EVENT(rcu_barrier, + + TP_PROTO(char *rcuname, char *s, int cpu, int cnt, unsigned long done), + + TP_ARGS(rcuname, s, cpu, cnt, done), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(char *, s) + __field(int, cpu) + __field(int, cnt) + __field(unsigned long, done) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->s = s; + __entry->cpu = cpu; + __entry->cnt = cnt; + __entry->done = done; + ), + + TP_printk("%s %s cpu %d remaining %d # %lu", + __entry->rcuname, __entry->s, __entry->cpu, __entry->cnt, + __entry->done) +); + #else /* #ifdef CONFIG_RCU_TRACE */ #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) @@ -564,6 +608,7 @@ TRACE_EVENT(rcu_torture_read, #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ do { } while (0) #define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) +#define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0) #endif /* #else #ifdef CONFIG_RCU_TRACE */ diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 6bb5d562253f..dda43d826504 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2259,6 +2259,17 @@ static int rcu_cpu_has_callbacks(int cpu) rcu_preempt_cpu_has_callbacks(cpu); } +/* + * Helper function for _rcu_barrier() tracing. If tracing is disabled, + * the compiler is expected to optimize this away. + */ +static void _rcu_barrier_trace(struct rcu_state *rsp, char *s, + int cpu, unsigned long done) +{ + trace_rcu_barrier(rsp->name, s, cpu, + atomic_read(&rsp->barrier_cpu_count), done); +} + /* * RCU callback function for _rcu_barrier(). If we are last, wake * up the task executing _rcu_barrier(). @@ -2268,8 +2279,12 @@ static void rcu_barrier_callback(struct rcu_head *rhp) struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head); struct rcu_state *rsp = rdp->rsp; - if (atomic_dec_and_test(&rsp->barrier_cpu_count)) + if (atomic_dec_and_test(&rsp->barrier_cpu_count)) { + _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done); complete(&rsp->barrier_completion); + } else { + _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done); + } } /* @@ -2280,6 +2295,7 @@ static void rcu_barrier_func(void *type) struct rcu_state *rsp = type; struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); + _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done); atomic_inc(&rsp->barrier_cpu_count); rsp->call(&rdp->barrier_head, rcu_barrier_callback); } @@ -2298,6 +2314,7 @@ static void _rcu_barrier(struct rcu_state *rsp) unsigned long snap_done; init_rcu_head_on_stack(&rd.barrier_head); + _rcu_barrier_trace(rsp, "Begin", -1, snap); /* Take mutex to serialize concurrent rcu_barrier() requests. */ mutex_lock(&rsp->barrier_mutex); @@ -2315,7 +2332,9 @@ static void _rcu_barrier(struct rcu_state *rsp) * value up to the next even number and adds two before comparing. */ snap_done = ACCESS_ONCE(rsp->n_barrier_done); + _rcu_barrier_trace(rsp, "Check", -1, snap_done); if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) { + _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done); smp_mb(); /* caller's subsequent code after above check. */ mutex_unlock(&rsp->barrier_mutex); return; @@ -2328,6 +2347,7 @@ static void _rcu_barrier(struct rcu_state *rsp) */ ACCESS_ONCE(rsp->n_barrier_done)++; WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1); + _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done); smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */ /* @@ -2364,13 +2384,19 @@ static void _rcu_barrier(struct rcu_state *rsp) preempt_disable(); rdp = per_cpu_ptr(rsp->rda, cpu); if (cpu_is_offline(cpu)) { + _rcu_barrier_trace(rsp, "Offline", cpu, + rsp->n_barrier_done); preempt_enable(); while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) schedule_timeout_interruptible(1); } else if (ACCESS_ONCE(rdp->qlen)) { + _rcu_barrier_trace(rsp, "OnlineQ", cpu, + rsp->n_barrier_done); smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); preempt_enable(); } else { + _rcu_barrier_trace(rsp, "OnlineNQ", cpu, + rsp->n_barrier_done); preempt_enable(); } } @@ -2403,6 +2429,7 @@ static void _rcu_barrier(struct rcu_state *rsp) smp_mb(); /* Keep increment after above mechanism. */ ACCESS_ONCE(rsp->n_barrier_done)++; WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0); + _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done); smp_mb(); /* Keep increment before caller's subsequent code. */ /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ -- cgit v1.2.3 From d7e187c8e9f30543f9cadfed094896ff414acb8f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 25 May 2012 11:49:15 -0700 Subject: rcu: Add rcu_barrier() statistics to debugfs tracing This commit adds an rcubarrier file to RCU's debugfs statistical tracing directory, providing diagnostic information on rcu_barrier(). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree_trace.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index a3556a2d842c..057408be6c3b 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -46,6 +46,40 @@ #define RCU_TREE_NONCORE #include "rcutree.h" +static void print_rcubarrier(struct seq_file *m, struct rcu_state *rsp) +{ + seq_printf(m, "%c bcc: %d nbd: %lu\n", + rsp->rcu_barrier_in_progress ? 'B' : '.', + atomic_read(&rsp->barrier_cpu_count), + rsp->n_barrier_done); +} + +static int show_rcubarrier(struct seq_file *m, void *unused) +{ +#ifdef CONFIG_TREE_PREEMPT_RCU + seq_puts(m, "rcu_preempt: "); + print_rcubarrier(m, &rcu_preempt_state); +#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + seq_puts(m, "rcu_sched: "); + print_rcubarrier(m, &rcu_sched_state); + seq_puts(m, "rcu_bh: "); + print_rcubarrier(m, &rcu_bh_state); + return 0; +} + +static int rcubarrier_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_rcubarrier, NULL); +} + +static const struct file_operations rcubarrier_fops = { + .owner = THIS_MODULE, + .open = rcubarrier_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + #ifdef CONFIG_RCU_BOOST static char convert_kthread_status(unsigned int kthread_status) @@ -453,6 +487,11 @@ static int __init rcutree_trace_init(void) if (!rcudir) goto free_out; + retval = debugfs_create_file("rcubarrier", 0444, rcudir, + NULL, &rcubarrier_fops); + if (!retval) + goto free_out; + retval = debugfs_create_file("rcudata", 0444, rcudir, NULL, &rcudata_fops); if (!retval) -- cgit v1.2.3 From 1bca8cf1a2c3c6683b12ad28a3e826ca7a834978 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Jun 2012 09:40:38 -0700 Subject: rcu: Remove unneeded __rcu_process_callbacks() argument With the advent of __this_cpu_ptr(), it is no longer necessary to pass both the rcu_state and rcu_data structures into __rcu_process_callbacks(). This commit therefore computes the rcu_data pointer from the rcu_state pointer within __rcu_process_callbacks() so that callers can pass in only the pointer to the rcu_state structure. This paves the way for linking the rcu_state structures together and iterating over them. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 8 ++++---- kernel/rcutree_plugin.h | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index dda43d826504..5376a156be8a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1788,9 +1788,10 @@ unlock_fqs_ret: * whom the rdp belongs. */ static void -__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) +__rcu_process_callbacks(struct rcu_state *rsp) { unsigned long flags; + struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); WARN_ON_ONCE(rdp->beenonline == 0); @@ -1827,9 +1828,8 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) static void rcu_process_callbacks(struct softirq_action *unused) { trace_rcu_utilization("Start RCU core"); - __rcu_process_callbacks(&rcu_sched_state, - &__get_cpu_var(rcu_sched_data)); - __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); + __rcu_process_callbacks(&rcu_sched_state); + __rcu_process_callbacks(&rcu_bh_state); rcu_preempt_process_callbacks(); trace_rcu_utilization("End RCU core"); } diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 9cb3a68819fa..5a80cdd9a0a3 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -687,8 +687,7 @@ static void rcu_preempt_check_callbacks(int cpu) */ static void rcu_preempt_process_callbacks(void) { - __rcu_process_callbacks(&rcu_preempt_state, - &__get_cpu_var(rcu_preempt_data)); + __rcu_process_callbacks(&rcu_preempt_state); } #ifdef CONFIG_RCU_BOOST -- cgit v1.2.3 From 6ce75a2326e6f8b3bdfb60e1de7934b89858e87b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Jun 2012 11:01:13 -0700 Subject: rcu: Introduce for_each_rcu_flavor() and use it The arrival of TREE_PREEMPT_RCU some years back included some ugly code involving either #ifdef or #ifdef'ed wrapper functions to iterate over all non-SRCU flavors of RCU. This commit therefore introduces a for_each_rcu_flavor() iterator over the rcu_state structures for each flavor of RCU to clean up a bit of the ugliness. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 53 +++++++++++++--------- kernel/rcutree.h | 12 +++-- kernel/rcutree_plugin.h | 116 ------------------------------------------------ 3 files changed, 37 insertions(+), 144 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 5376a156be8a..b61c3ffc80e9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -84,6 +84,7 @@ struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); static struct rcu_state *rcu_state; +LIST_HEAD(rcu_struct_flavors); /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; @@ -860,9 +861,10 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) */ void rcu_cpu_stall_reset(void) { - rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; - rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; - rcu_preempt_stall_reset(); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + rsp->jiffies_stall = jiffies + ULONG_MAX / 2; } static struct notifier_block rcu_panic_block = { @@ -1827,10 +1829,11 @@ __rcu_process_callbacks(struct rcu_state *rsp) */ static void rcu_process_callbacks(struct softirq_action *unused) { + struct rcu_state *rsp; + trace_rcu_utilization("Start RCU core"); - __rcu_process_callbacks(&rcu_sched_state); - __rcu_process_callbacks(&rcu_bh_state); - rcu_preempt_process_callbacks(); + for_each_rcu_flavor(rsp) + __rcu_process_callbacks(rsp); trace_rcu_utilization("End RCU core"); } @@ -2241,9 +2244,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) */ static int rcu_pending(int cpu) { - return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || - __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || - rcu_preempt_pending(cpu); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu))) + return 1; + return 0; } /* @@ -2253,10 +2259,13 @@ static int rcu_pending(int cpu) */ static int rcu_cpu_has_callbacks(int cpu) { + struct rcu_state *rsp; + /* RCU callbacks either ready or pending? */ - return per_cpu(rcu_sched_data, cpu).nxtlist || - per_cpu(rcu_bh_data, cpu).nxtlist || - rcu_preempt_cpu_has_callbacks(cpu); + for_each_rcu_flavor(rsp) + if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) + return 1; + return 0; } /* @@ -2551,9 +2560,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) static void __cpuinit rcu_prepare_cpu(int cpu) { - rcu_init_percpu_data(cpu, &rcu_sched_state, 0); - rcu_init_percpu_data(cpu, &rcu_bh_state, 0); - rcu_preempt_init_percpu_data(cpu); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + rcu_init_percpu_data(cpu, rsp, + strcmp(rsp->name, "rcu_preempt") == 0); } /* @@ -2565,6 +2576,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, long cpu = (long)hcpu; struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_node *rnp = rdp->mynode; + struct rcu_state *rsp; trace_rcu_utilization("Start CPU hotplug"); switch (action) { @@ -2589,18 +2601,16 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, * touch any data without introducing corruption. We send the * dying CPU's callbacks to an arbitrarily chosen online CPU. */ - rcu_cleanup_dying_cpu(&rcu_bh_state); - rcu_cleanup_dying_cpu(&rcu_sched_state); - rcu_preempt_cleanup_dying_cpu(); + for_each_rcu_flavor(rsp) + rcu_cleanup_dying_cpu(rsp); rcu_cleanup_after_idle(cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: - rcu_cleanup_dead_cpu(cpu, &rcu_bh_state); - rcu_cleanup_dead_cpu(cpu, &rcu_sched_state); - rcu_preempt_cleanup_dead_cpu(cpu); + for_each_rcu_flavor(rsp) + rcu_cleanup_dead_cpu(cpu, rsp); break; default: break; @@ -2717,6 +2727,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, per_cpu_ptr(rsp->rda, i)->mynode = rnp; rcu_boot_init_percpu_data(i, rsp); } + list_add(&rsp->flavors, &rcu_struct_flavors); } /* diff --git a/kernel/rcutree.h b/kernel/rcutree.h index be10286ad380..b92c4550a6e6 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -422,8 +422,13 @@ struct rcu_state { unsigned long gp_max; /* Maximum GP duration in */ /* jiffies. */ char *name; /* Name of structure. */ + struct list_head flavors; /* List of RCU flavors. */ }; +extern struct list_head rcu_struct_flavors; +#define for_each_rcu_flavor(rsp) \ + list_for_each_entry((rsp), &rcu_struct_flavors, flavors) + /* Return values for rcu_preempt_offline_tasks(). */ #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ @@ -466,25 +471,18 @@ static void rcu_stop_cpu_kthread(int cpu); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_print_detail_task_stall(struct rcu_state *rsp); static int rcu_print_task_stall(struct rcu_node *rnp); -static void rcu_preempt_stall_reset(void); static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU static int rcu_preempt_offline_tasks(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -static void rcu_preempt_cleanup_dead_cpu(int cpu); static void rcu_preempt_check_callbacks(int cpu); -static void rcu_preempt_process_callbacks(void); void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, bool wake); #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ -static int rcu_preempt_pending(int cpu); -static int rcu_preempt_cpu_has_callbacks(int cpu); -static void __cpuinit rcu_preempt_init_percpu_data(int cpu); -static void rcu_preempt_cleanup_dying_cpu(void); static void __init __rcu_init_preempt(void); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 5a80cdd9a0a3..d18b4d383afe 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -544,16 +544,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp) return ndetected; } -/* - * Suppress preemptible RCU's CPU stall warnings by pushing the - * time of the next stall-warning message comfortably far into the - * future. - */ -static void rcu_preempt_stall_reset(void) -{ - rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; -} - /* * Check that the list of blocked tasks for the newly completed grace * period is in fact empty. It is a serious bug to complete a grace @@ -654,14 +644,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -/* - * Do CPU-offline processing for preemptible RCU. - */ -static void rcu_preempt_cleanup_dead_cpu(int cpu) -{ - rcu_cleanup_dead_cpu(cpu, &rcu_preempt_state); -} - /* * Check for a quiescent state from the current CPU. When a task blocks, * the task is recorded in the corresponding CPU's rcu_node structure, @@ -682,14 +664,6 @@ static void rcu_preempt_check_callbacks(int cpu) t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; } -/* - * Process callbacks for preemptible RCU. - */ -static void rcu_preempt_process_callbacks(void) -{ - __rcu_process_callbacks(&rcu_preempt_state); -} - #ifdef CONFIG_RCU_BOOST static void rcu_preempt_do_callbacks(void) @@ -921,24 +895,6 @@ mb_ret: } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -/* - * Check to see if there is any immediate preemptible-RCU-related work - * to be done. - */ -static int rcu_preempt_pending(int cpu) -{ - return __rcu_pending(&rcu_preempt_state, - &per_cpu(rcu_preempt_data, cpu)); -} - -/* - * Does preemptible RCU have callbacks on this CPU? - */ -static int rcu_preempt_cpu_has_callbacks(int cpu) -{ - return !!per_cpu(rcu_preempt_data, cpu).nxtlist; -} - /** * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. */ @@ -948,23 +904,6 @@ void rcu_barrier(void) } EXPORT_SYMBOL_GPL(rcu_barrier); -/* - * Initialize preemptible RCU's per-CPU data. - */ -static void __cpuinit rcu_preempt_init_percpu_data(int cpu) -{ - rcu_init_percpu_data(cpu, &rcu_preempt_state, 1); -} - -/* - * Move preemptible RCU's callbacks from dying CPU to other online CPU - * and record a quiescent state. - */ -static void rcu_preempt_cleanup_dying_cpu(void) -{ - rcu_cleanup_dying_cpu(&rcu_preempt_state); -} - /* * Initialize preemptible RCU's state structures. */ @@ -1049,14 +988,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp) return 0; } -/* - * Because preemptible RCU does not exist, there is no need to suppress - * its CPU stall warnings. - */ -static void rcu_preempt_stall_reset(void) -{ -} - /* * Because there is no preemptible RCU, there can be no readers blocked, * so there is no need to check for blocked tasks. So check only for @@ -1084,14 +1015,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -/* - * Because preemptible RCU does not exist, it never needs CPU-offline - * processing. - */ -static void rcu_preempt_cleanup_dead_cpu(int cpu) -{ -} - /* * Because preemptible RCU does not exist, it never has any callbacks * to check. @@ -1100,14 +1023,6 @@ static void rcu_preempt_check_callbacks(int cpu) { } -/* - * Because preemptible RCU does not exist, it never has any callbacks - * to process. - */ -static void rcu_preempt_process_callbacks(void) -{ -} - /* * Queue an RCU callback for lazy invocation after a grace period. * This will likely be later named something like "call_rcu_lazy()", @@ -1148,22 +1063,6 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -/* - * Because preemptible RCU does not exist, it never has any work to do. - */ -static int rcu_preempt_pending(int cpu) -{ - return 0; -} - -/* - * Because preemptible RCU does not exist, it never has callbacks - */ -static int rcu_preempt_cpu_has_callbacks(int cpu) -{ - return 0; -} - /* * Because preemptible RCU does not exist, rcu_barrier() is just * another name for rcu_barrier_sched(). @@ -1174,21 +1073,6 @@ void rcu_barrier(void) } EXPORT_SYMBOL_GPL(rcu_barrier); -/* - * Because preemptible RCU does not exist, there is no per-CPU - * data to initialize. - */ -static void __cpuinit rcu_preempt_init_percpu_data(int cpu) -{ -} - -/* - * Because there is no preemptible RCU, there is no cleanup to do. - */ -static void rcu_preempt_cleanup_dying_cpu(void) -{ -} - /* * Because preemptible RCU does not exist, it need not be initialized. */ -- cgit v1.2.3 From c0cc962da3e7770feb3665f087ea3e23d8c15479 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Jun 2012 12:25:39 -0700 Subject: rcu: Use for_each_rcu_flavor() in TREE_RCU tracing This commit applies the new for_each_rcu_flavor() macro to the kernel/rcutree_trace.c file. Signed-off-by: Paul E. McKenney --- kernel/rcutree_trace.c | 116 ++++++++++++++++++------------------------------- 1 file changed, 43 insertions(+), 73 deletions(-) diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 057408be6c3b..a16ddbd6fdc4 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -46,24 +46,15 @@ #define RCU_TREE_NONCORE #include "rcutree.h" -static void print_rcubarrier(struct seq_file *m, struct rcu_state *rsp) -{ - seq_printf(m, "%c bcc: %d nbd: %lu\n", - rsp->rcu_barrier_in_progress ? 'B' : '.', - atomic_read(&rsp->barrier_cpu_count), - rsp->n_barrier_done); -} - static int show_rcubarrier(struct seq_file *m, void *unused) { -#ifdef CONFIG_TREE_PREEMPT_RCU - seq_puts(m, "rcu_preempt: "); - print_rcubarrier(m, &rcu_preempt_state); -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - seq_puts(m, "rcu_sched: "); - print_rcubarrier(m, &rcu_sched_state); - seq_puts(m, "rcu_bh: "); - print_rcubarrier(m, &rcu_bh_state); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + seq_printf(m, "%s: %c bcc: %d nbd: %lu\n", + rsp->name, rsp->rcu_barrier_in_progress ? 'B' : '.', + atomic_read(&rsp->barrier_cpu_count), + rsp->n_barrier_done); return 0; } @@ -129,24 +120,16 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); } -#define PRINT_RCU_DATA(name, func, m) \ - do { \ - int _p_r_d_i; \ - \ - for_each_possible_cpu(_p_r_d_i) \ - func(m, &per_cpu(name, _p_r_d_i)); \ - } while (0) - static int show_rcudata(struct seq_file *m, void *unused) { -#ifdef CONFIG_TREE_PREEMPT_RCU - seq_puts(m, "rcu_preempt:\n"); - PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data, m); -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - seq_puts(m, "rcu_sched:\n"); - PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m); - seq_puts(m, "rcu_bh:\n"); - PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data, m); + int cpu; + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) { + seq_printf(m, "%s:\n", rsp->name); + for_each_possible_cpu(cpu) + print_one_rcu_data(m, per_cpu_ptr(rsp->rda, cpu)); + } return 0; } @@ -200,6 +183,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) static int show_rcudata_csv(struct seq_file *m, void *unused) { + int cpu; + struct rcu_state *rsp; + seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\""); @@ -207,14 +193,11 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) seq_puts(m, "\"kt\",\"ktl\""); #endif /* #ifdef CONFIG_RCU_BOOST */ seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n"); -#ifdef CONFIG_TREE_PREEMPT_RCU - seq_puts(m, "\"rcu_preempt:\"\n"); - PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - seq_puts(m, "\"rcu_sched:\"\n"); - PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m); - seq_puts(m, "\"rcu_bh:\"\n"); - PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data_csv, m); + for_each_rcu_flavor(rsp) { + seq_printf(m, "\"%s:\"\n", rsp->name); + for_each_possible_cpu(cpu) + print_one_rcu_data_csv(m, per_cpu_ptr(rsp->rda, cpu)); + } return 0; } @@ -304,9 +287,9 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) struct rcu_node *rnp; gpnum = rsp->gpnum; - seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " + seq_printf(m, "%s: c=%lu g=%lu s=%d jfq=%ld j=%x " "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", - rsp->completed, gpnum, rsp->fqs_state, + rsp->name, rsp->completed, gpnum, rsp->fqs_state, (long)(rsp->jiffies_force_qs - jiffies), (int)(jiffies & 0xffff), rsp->n_force_qs, rsp->n_force_qs_ngp, @@ -329,14 +312,10 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) static int show_rcuhier(struct seq_file *m, void *unused) { -#ifdef CONFIG_TREE_PREEMPT_RCU - seq_puts(m, "rcu_preempt:\n"); - print_one_rcu_state(m, &rcu_preempt_state); -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - seq_puts(m, "rcu_sched:\n"); - print_one_rcu_state(m, &rcu_sched_state); - seq_puts(m, "rcu_bh:\n"); - print_one_rcu_state(m, &rcu_bh_state); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + print_one_rcu_state(m, rsp); return 0; } @@ -377,11 +356,10 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp) static int show_rcugp(struct seq_file *m, void *unused) { -#ifdef CONFIG_TREE_PREEMPT_RCU - show_one_rcugp(m, &rcu_preempt_state); -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - show_one_rcugp(m, &rcu_sched_state); - show_one_rcugp(m, &rcu_bh_state); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + show_one_rcugp(m, rsp); return 0; } @@ -416,28 +394,20 @@ static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) rdp->n_rp_need_nothing); } -static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp) +static int show_rcu_pending(struct seq_file *m, void *unused) { int cpu; struct rcu_data *rdp; - - for_each_possible_cpu(cpu) { - rdp = per_cpu_ptr(rsp->rda, cpu); - if (rdp->beenonline) - print_one_rcu_pending(m, rdp); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) { + seq_printf(m, "%s:\n", rsp->name); + for_each_possible_cpu(cpu) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (rdp->beenonline) + print_one_rcu_pending(m, rdp); + } } -} - -static int show_rcu_pending(struct seq_file *m, void *unused) -{ -#ifdef CONFIG_TREE_PREEMPT_RCU - seq_puts(m, "rcu_preempt:\n"); - print_rcu_pendings(m, &rcu_preempt_state); -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - seq_puts(m, "rcu_sched:\n"); - print_rcu_pendings(m, &rcu_sched_state); - seq_puts(m, "rcu_bh:\n"); - print_rcu_pendings(m, &rcu_bh_state); return 0; } -- cgit v1.2.3 From ff015030c939f0bec68fa9b8898da3aaa7fe55ea Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Jun 2012 13:16:58 -0700 Subject: rcu: RCU_SAVE_DYNTICK code no longer ever dead Before RCU had unified idle, the RCU_SAVE_DYNTICK leg of the switch statement in force_quiescent_state() was dead code for CONFIG_NO_HZ=n kernel builds. With unified idle, the code is never dead. This commit therefore removes the "if" statement designed to make gcc aware of when the code was and was not dead. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index b61c3ffc80e9..967b4bed2cf3 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1747,8 +1747,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) break; /* grace period idle or initializing, ignore. */ case RCU_SAVE_DYNTICK: - if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) - break; /* So gcc recognizes the dead code. */ raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ -- cgit v1.2.3 From 74d874e7bdc5b09cedcc7da0de44db25888eea10 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 7 May 2012 13:43:30 -0700 Subject: rcu: Update documentation to cover call_srcu() and srcu_barrier(). The advent of call_srcu() and srcu_barrier() obsoleted some of the documentation, so this commit brings that up to date. Signed-off-by: Paul E. McKenney --- Documentation/RCU/checklist.txt | 39 ++++++++++++++++++++------------------- Documentation/RCU/rcubarrier.txt | 15 ++++----------- Documentation/RCU/torture.txt | 9 +++++++++ Documentation/RCU/whatisRCU.txt | 6 +++--- 4 files changed, 36 insertions(+), 33 deletions(-) diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 5c8d74968090..fc103d7a0474 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -162,9 +162,9 @@ over a rather long period of time, but improvements are always welcome! when publicizing a pointer to a structure that can be traversed by an RCU read-side critical section. -5. If call_rcu(), or a related primitive such as call_rcu_bh() or - call_rcu_sched(), is used, the callback function must be - written to be called from softirq context. In particular, +5. If call_rcu(), or a related primitive such as call_rcu_bh(), + call_rcu_sched(), or call_srcu() is used, the callback function + must be written to be called from softirq context. In particular, it cannot block. 6. Since synchronize_rcu() can block, it cannot be called from @@ -202,11 +202,12 @@ over a rather long period of time, but improvements are always welcome! updater uses call_rcu_sched() or synchronize_sched(), then the corresponding readers must disable preemption, possibly by calling rcu_read_lock_sched() and rcu_read_unlock_sched(). - If the updater uses synchronize_srcu(), the the corresponding - readers must use srcu_read_lock() and srcu_read_unlock(), - and with the same srcu_struct. The rules for the expedited - primitives are the same as for their non-expedited counterparts. - Mixing things up will result in confusion and broken kernels. + If the updater uses synchronize_srcu() or call_srcu(), + the the corresponding readers must use srcu_read_lock() and + srcu_read_unlock(), and with the same srcu_struct. The rules for + the expedited primitives are the same as for their non-expedited + counterparts. Mixing things up will result in confusion and + broken kernels. One exception to this rule: rcu_read_lock() and rcu_read_unlock() may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh() @@ -333,14 +334,14 @@ over a rather long period of time, but improvements are always welcome! victim CPU from ever going offline.) 14. SRCU (srcu_read_lock(), srcu_read_unlock(), srcu_dereference(), - synchronize_srcu(), and synchronize_srcu_expedited()) may only - be invoked from process context. Unlike other forms of RCU, it - -is- permissible to block in an SRCU read-side critical section - (demarked by srcu_read_lock() and srcu_read_unlock()), hence the - "SRCU": "sleepable RCU". Please note that if you don't need - to sleep in read-side critical sections, you should be using - RCU rather than SRCU, because RCU is almost always faster and - easier to use than is SRCU. + synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu()) + may only be invoked from process context. Unlike other forms of + RCU, it -is- permissible to block in an SRCU read-side critical + section (demarked by srcu_read_lock() and srcu_read_unlock()), + hence the "SRCU": "sleepable RCU". Please note that if you + don't need to sleep in read-side critical sections, you should be + using RCU rather than SRCU, because RCU is almost always faster + and easier to use than is SRCU. If you need to enter your read-side critical section in a hardirq or exception handler, and then exit that same read-side @@ -353,8 +354,8 @@ over a rather long period of time, but improvements are always welcome! cleanup_srcu_struct(). These are passed a "struct srcu_struct" that defines the scope of a given SRCU domain. Once initialized, the srcu_struct is passed to srcu_read_lock(), srcu_read_unlock() - synchronize_srcu(), and synchronize_srcu_expedited(). A given - synchronize_srcu() waits only for SRCU read-side critical + synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu(). + A given synchronize_srcu() waits only for SRCU read-side critical sections governed by srcu_read_lock() and srcu_read_unlock() calls that have been passed the same srcu_struct. This property is what makes sleeping read-side critical sections tolerable -- @@ -374,7 +375,7 @@ over a rather long period of time, but improvements are always welcome! requiring SRCU's read-side deadlock immunity or low read-side realtime latency. - Note that, rcu_assign_pointer() relates to SRCU just as they do + Note that, rcu_assign_pointer() relates to SRCU just as it does to other forms of RCU. 15. The whole point of call_rcu(), synchronize_rcu(), and friends diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt index e439a0edee22..38428c125135 100644 --- a/Documentation/RCU/rcubarrier.txt +++ b/Documentation/RCU/rcubarrier.txt @@ -79,8 +79,6 @@ complete. Pseudo-code using rcu_barrier() is as follows: 2. Execute rcu_barrier(). 3. Allow the module to be unloaded. -Quick Quiz #1: Why is there no srcu_barrier()? - The rcutorture module makes use of rcu_barrier in its exit function as follows: @@ -162,7 +160,7 @@ for any pre-existing callbacks to complete. Then lines 55-62 print status and do operation-specific cleanup, and then return, permitting the module-unload operation to be completed. -Quick Quiz #2: Is there any other situation where rcu_barrier() might +Quick Quiz #1: Is there any other situation where rcu_barrier() might be required? Your module might have additional complications. For example, if your @@ -242,7 +240,7 @@ reaches zero, as follows: 4 complete(&rcu_barrier_completion); 5 } -Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes +Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes immediately (thus incrementing rcu_barrier_cpu_count to the value one), but the other CPU's rcu_barrier_func() invocations are delayed for a full grace period? Couldn't this result in @@ -259,12 +257,7 @@ so that your module may be safely unloaded. Answers to Quick Quizzes -Quick Quiz #1: Why is there no srcu_barrier()? - -Answer: Since there is no call_srcu(), there can be no outstanding SRCU - callbacks. Therefore, there is no need to wait for them. - -Quick Quiz #2: Is there any other situation where rcu_barrier() might +Quick Quiz #1: Is there any other situation where rcu_barrier() might be required? Answer: Interestingly enough, rcu_barrier() was not originally @@ -278,7 +271,7 @@ Answer: Interestingly enough, rcu_barrier() was not originally implementing rcutorture, and found that rcu_barrier() solves this problem as well. -Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes +Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes immediately (thus incrementing rcu_barrier_cpu_count to the value one), but the other CPU's rcu_barrier_func() invocations are delayed for a full grace period? Couldn't this result in diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt index 4ddf3913fd8c..7dce8a17eac2 100644 --- a/Documentation/RCU/torture.txt +++ b/Documentation/RCU/torture.txt @@ -174,11 +174,20 @@ torture_type The type of RCU to test, with string values as follows: and synchronize_rcu_bh_expedited(). "srcu": srcu_read_lock(), srcu_read_unlock() and + call_srcu(). + + "srcu_sync": srcu_read_lock(), srcu_read_unlock() and synchronize_srcu(). "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and synchronize_srcu_expedited(). + "srcu_raw": srcu_read_lock_raw(), srcu_read_unlock_raw(), + and call_srcu(). + + "srcu_raw_sync": srcu_read_lock_raw(), srcu_read_unlock_raw(), + and synchronize_srcu(). + "sched": preempt_disable(), preempt_enable(), and call_rcu_sched(). diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 6bbe8dcdc3da..69ee188515e7 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -833,9 +833,9 @@ sched: Critical sections Grace period Barrier SRCU: Critical sections Grace period Barrier - srcu_read_lock synchronize_srcu N/A - srcu_read_unlock synchronize_srcu_expedited - srcu_read_lock_raw + srcu_read_lock synchronize_srcu srcu_barrier + srcu_read_unlock call_srcu + srcu_read_lock_raw synchronize_srcu_expedited srcu_read_unlock_raw srcu_dereference -- cgit v1.2.3 From 751a68b2e46fe11a42450cb55a16e8065eddec7e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 7 May 2012 13:44:44 -0700 Subject: rcu: Rationalize ordering of torture_ops list Move the raw SRCU interfaces out of the middle of the normal SRCU interfaces. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutorture.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index e66b34ab7555..9850479f319d 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -1908,8 +1908,8 @@ rcu_torture_init(void) static struct rcu_torture_ops *torture_ops[] = { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, - &srcu_ops, &srcu_sync_ops, &srcu_raw_ops, - &srcu_raw_sync_ops, &srcu_expedited_ops, + &srcu_ops, &srcu_sync_ops, &srcu_expedited_ops, + &srcu_raw_ops, &srcu_raw_sync_ops, &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; mutex_lock(&fullstop_mutex); -- cgit v1.2.3 From e3f8d3788ed7cf55946030dc9b76e73edb111602 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 8 May 2012 10:21:50 -0700 Subject: rcu: Test srcu_barrier() from rcutorture test suite SRCU now has a call_srcu() and an srcu_barrier(), but rcutorture does not test them. This commit adds the machinery to allow rcutorture's existing tests for call_rcu() and rcu_barrier() to apply to the SRCU equivalents. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutorture.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 9850479f319d..7b6935e0cee3 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -635,6 +635,17 @@ static void srcu_torture_synchronize(void) synchronize_srcu(&srcu_ctl); } +static void srcu_torture_call(struct rcu_head *head, + void (*func)(struct rcu_head *head)) +{ + call_srcu(&srcu_ctl, head, func); +} + +static void srcu_torture_barrier(void) +{ + srcu_barrier(&srcu_ctl); +} + static int srcu_torture_stats(char *page) { int cnt = 0; @@ -661,8 +672,8 @@ static struct rcu_torture_ops srcu_ops = { .completed = srcu_torture_completed, .deferred_free = srcu_torture_deferred_free, .sync = srcu_torture_synchronize, - .call = NULL, - .cb_barrier = NULL, + .call = srcu_torture_call, + .cb_barrier = srcu_torture_barrier, .stats = srcu_torture_stats, .name = "srcu" }; -- cgit v1.2.3 From c6ebcbb60c8c68a88160fe54302e851700d1362c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 May 2012 19:21:41 -0700 Subject: rcu: Fix bug in rcu_barrier() torture test The child threads in the rcu_torture_barrier_cbs() are improperly synchronized, which can cause the rcu_barrier() tests to hang. The failure mode is as follows: 1. CPU 0 running in rcu_torture_barrier() sets barrier_cbs_count to n_barrier_cbs. 2. CPU 1 running in rcu_torture_barrier_cbs() wakes up, posts its RCU callback, and atomically decrements barrier_cbs_count. Because barrier_cbs_count is not zero, it does not do the wake_up(). 3. CPU 2 running in rcu_torture_barrier_cbs() wakes up, but finds that barrier_cbs_count is not equal to n_barrier_cbs, and so returns to sleep. 4. The value of barrier_cbs_count therefore never reaches zero, which causes the test to hang. This commit therefore uses a phase variable to coordinate the test, preventing this scenario from occurring. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutorture.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 7b6935e0cee3..f7fe73e59c9f 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -206,6 +206,7 @@ static unsigned long boost_starttime; /* jiffies of next boost test start. */ DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ /* and boost task create/destroy. */ static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */ +static bool barrier_phase; /* Test phase. */ static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); @@ -1642,6 +1643,7 @@ void rcu_torture_barrier_cbf(struct rcu_head *rcu) static int rcu_torture_barrier_cbs(void *arg) { long myid = (long)arg; + bool lastphase = 0; struct rcu_head rcu; init_rcu_head_on_stack(&rcu); @@ -1649,9 +1651,11 @@ static int rcu_torture_barrier_cbs(void *arg) set_user_nice(current, 19); do { wait_event(barrier_cbs_wq[myid], - atomic_read(&barrier_cbs_count) == n_barrier_cbs || + barrier_phase != lastphase || kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP); + lastphase = barrier_phase; + smp_mb(); /* ensure barrier_phase load before ->call(). */ if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) break; cur_ops->call(&rcu, rcu_torture_barrier_cbf); @@ -1676,7 +1680,8 @@ static int rcu_torture_barrier(void *arg) do { atomic_set(&barrier_cbs_invoked, 0); atomic_set(&barrier_cbs_count, n_barrier_cbs); - /* wake_up() path contains the required barriers. */ + smp_mb(); /* Ensure barrier_phase after prior assignments. */ + barrier_phase = !barrier_phase; for (i = 0; i < n_barrier_cbs; i++) wake_up(&barrier_cbs_wq[i]); wait_event(barrier_wq, -- cgit v1.2.3 From 143aa672f4fc643420c8325ad09c379ed33a27cf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 24 May 2012 18:17:48 -0700 Subject: rcu: Fix diagnostic-printk typo in rcutorture The rcu_torture_barrier() function has a copy-and-paste typo in the string passed to rcutorture_shutdown_absorb(), which this commit fixes. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutorture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index f7fe73e59c9f..045a3dc233ea 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -1700,7 +1700,7 @@ static int rcu_torture_barrier(void *arg) schedule_timeout_interruptible(HZ / 10); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping"); - rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); + rcutorture_shutdown_absorb("rcu_torture_barrier"); while (!kthread_should_stop()) schedule_timeout_interruptible(1); return 0; -- cgit v1.2.3 From 72472a02a9c4507ef54d03d71bb253c26015f52c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 May 2012 17:50:51 -0700 Subject: rcu: Make rcutorture fakewriters invoke rcu_barrier() The current rcutorture rcu_barrier() testing never intentionally runs more than one instance of rcu_barrier() at a given time. This fails to test the the shiny new concurrency features of rcu_barrier(). This commit therefore modifies the rcutorture fakewriter kthread to randomly invoke rcu_barrier() rather than the usual synchronize_rcu(). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutorture.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 045a3dc233ea..c279ee920947 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -1025,7 +1025,11 @@ rcu_torture_fakewriter(void *arg) do { schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); udelay(rcu_random(&rand) & 0x3ff); - cur_ops->sync(); + if (cur_ops->cb_barrier != NULL && + rcu_random(&rand) % (nfakewriters * 8) == 0) + cur_ops->cb_barrier(); + else + cur_ops->sync(); rcu_stutter_wait("rcu_torture_fakewriter"); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); -- cgit v1.2.3 From 285fe29481d865ae381ad3924c80894e6968c2d8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 May 2012 08:45:12 -0700 Subject: rcu: Fix detection of abruptly-ending stall The code that attempts to identify stalls that end just as we detect them is broken by both flavors of initialization failure. This commit therefore properly initializes and computes the count of the number of reasons why the RCU grace period is stalled. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4b97bba7396e..dc8c5284fe06 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -733,7 +733,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) int cpu; long delta; unsigned long flags; - int ndetected; + int ndetected = 0; struct rcu_node *rnp = rcu_get_root(rsp); /* Only let one CPU complain about others per time interval. */ @@ -774,7 +774,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) */ rnp = rcu_get_root(rsp); raw_spin_lock_irqsave(&rnp->lock, flags); - ndetected = rcu_print_task_stall(rnp); + ndetected += rcu_print_task_stall(rnp); raw_spin_unlock_irqrestore(&rnp->lock, flags); print_cpu_stall_info_end(); -- cgit v1.2.3 From 3f5d3ea64f1783f0d4ea0d35890ae3297f045a8b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 May 2012 15:39:56 -0700 Subject: rcu: Consolidate duplicate callback-list initialization There are a couple of open-coded initializations of the rcu_data structure's RCU callback list. This commit therefore consolidates them into a new init_callback_list() function. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index dc8c5284fe06..81e0394e46af 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -936,6 +936,18 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) return ret; } +/* + * Initialize the specified rcu_data structure's callback list to empty. + */ +static void init_callback_list(struct rcu_data *rdp) +{ + int i; + + rdp->nxtlist = NULL; + for (i = 0; i < RCU_NEXT_SIZE; i++) + rdp->nxttail[i] = &rdp->nxtlist; +} + /* * Advance this CPU's callbacks, but only if the current grace period * has ended. This may be called only from the CPU to whom the rdp @@ -1328,8 +1340,6 @@ static void rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { - int i; - /* * Orphan the callbacks. First adjust the counts. This is safe * because ->onofflock excludes _rcu_barrier()'s adoption of @@ -1369,9 +1379,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, } /* Finally, initialize the rcu_data structure's list to empty. */ - rdp->nxtlist = NULL; - for (i = 0; i < RCU_NEXT_SIZE; i++) - rdp->nxttail[i] = &rdp->nxtlist; + init_callback_list(rdp); } /* @@ -2407,16 +2415,13 @@ static void __init rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) { unsigned long flags; - int i; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rcu_get_root(rsp); /* Set up local state, ensuring consistent view of global state. */ raw_spin_lock_irqsave(&rnp->lock, flags); rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); - rdp->nxtlist = NULL; - for (i = 0; i < RCU_NEXT_SIZE; i++) - rdp->nxttail[i] = &rdp->nxtlist; + init_callback_list(rdp); rdp->qlen_lazy = 0; rdp->qlen = 0; rdp->dynticks = &per_cpu(rcu_dynticks, cpu); -- cgit v1.2.3 From 1d1fb395f6dbc07b36285bbedcf01a73b57f7cb5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 May 2012 15:44:42 -0700 Subject: rcu: Add ACCESS_ONCE() to ->qlen accesses The _rcu_barrier() function accesses other CPUs' rcu_data structure's ->qlen field without benefit of locking. This commit therefore adds the required ACCESS_ONCE() wrappers around accesses and updates that need it. ACCESS_ONCE() is not needed when a CPU accesses its own ->qlen, or in code that cannot run while _rcu_barrier() is sampling ->qlen fields. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 81e0394e46af..89addada3e3a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1350,7 +1350,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, rsp->qlen += rdp->qlen; rdp->n_cbs_orphaned += rdp->qlen; rdp->qlen_lazy = 0; - rdp->qlen = 0; + ACCESS_ONCE(rdp->qlen) = 0; } /* @@ -1600,7 +1600,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) } smp_mb(); /* List handling before counting for rcu_barrier(). */ rdp->qlen_lazy -= count_lazy; - rdp->qlen -= count; + ACCESS_ONCE(rdp->qlen) -= count; rdp->n_cbs_invoked += count; /* Reinstate batch limit if we have worked down the excess. */ @@ -1889,7 +1889,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), rdp = this_cpu_ptr(rsp->rda); /* Add the callback to our list. */ - rdp->qlen++; + ACCESS_ONCE(rdp->qlen)++; if (lazy) rdp->qlen_lazy++; else @@ -2423,7 +2423,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); init_callback_list(rdp); rdp->qlen_lazy = 0; - rdp->qlen = 0; + ACCESS_ONCE(rdp->qlen) = 0; rdp->dynticks = &per_cpu(rcu_dynticks, cpu); WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); -- cgit v1.2.3 From 172708d002e0a2aca032b04fe6f2b8525c29244a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 May 2012 15:23:45 -0700 Subject: rcu: Add a gcc-style structure initializer for RCU pointers RCU_INIT_POINTER() returns a value that is never used, and which should be abolished due to terminal ugliness: q = RCU_INIT_POINTER(global_p, p); However, there are two uses that cannot be handled by a do-while formulation because they do gcc-style initialization: RCU_INIT_POINTER(.real_cred, &init_cred), RCU_INIT_POINTER(.cred, &init_cred), This usage is clever, but not necessarily the nicest approach. This commit therefore creates an RCU_POINTER_INITIALIZER() macro that is specifically designed for gcc-style initialization. Signed-off-by: Paul E. McKenney Acked-by: David Howells --- include/linux/rcupdate.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9cac722b169c..ffe24c09e53d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -905,6 +905,14 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define RCU_INIT_POINTER(p, v) \ p = (typeof(*v) __force __rcu *)(v) +/** + * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer + * + * GCC-style initialization for an RCU-protected pointer in a structure field. + */ +#define RCU_POINTER_INITIALIZER(p, v) \ + .p = (typeof(*v) __force __rcu *)(v) + static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) { return offset < 4096; -- cgit v1.2.3 From d36cc701b28983ea2c8d80afe68aae5452aea3bf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 May 2012 15:33:15 -0700 Subject: rcu: Use new RCU_POINTER_INITIALIZER for gcc-style initializations This commit applies the INIT_RCU_POINTER() macro to all uses of RCU_POINTER_INITIALIZER() that were all too cleverly creating gcc-style initializations. Signed-off-by: Paul E. McKenney Acked-by: David Howells --- include/linux/init_task.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9e65eff6af3b..8a7476186990 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -168,8 +168,8 @@ extern struct cred init_cred; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - RCU_INIT_POINTER(.real_cred, &init_cred), \ - RCU_INIT_POINTER(.cred, &init_cred), \ + RCU_POINTER_INITIALIZER(real_cred, &init_cred), \ + RCU_POINTER_INITIALIZER(cred, &init_cred), \ .comm = INIT_TASK_COMM, \ .thread = INIT_THREAD, \ .fs = &init_fs, \ -- cgit v1.2.3 From d1b88eb9e3bccaa43fb5d1bde1cbe210b3434731 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 May 2012 15:42:30 -0700 Subject: rcu: Remove return value from RCU_INIT_POINTER() The return value from RCU_INIT_POINTER() is not used, and using it would be quite ugly, for example: q = RCU_INIT_POINTER(global_p, p); To prevent this sort of ugliness from appearing, this commit wraps RCU_INIT_POINTER() in a do-while loop. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Acked-by: David Howells --- include/linux/rcupdate.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ffe24c09e53d..abf44d89c6ce 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -903,7 +903,9 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * the reader-accessible portions of the linked structure. */ #define RCU_INIT_POINTER(p, v) \ - p = (typeof(*v) __force __rcu *)(v) + do { \ + p = (typeof(*v) __force __rcu *)(v); \ + } while (0) /** * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer -- cgit v1.2.3 From e5c1f444d28b1a9eaf9c3927041db0414f684ef4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 May 2012 16:31:38 -0700 Subject: key: Remove extraneous parentheses from rcu_assign_keypointer() This commit removes the extraneous parentheses from rcu_assign_keypointer() so that rcu_assign_pointer() can be wrapped in do-while. It also wraps rcu_assign_keypointer() in a do-while and parenthesizes its final argument, as suggested by David Howells. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/key.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/key.h b/include/linux/key.h index 4cd22ed627ef..cef3b315ba7c 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -303,7 +303,9 @@ static inline bool key_is_instantiated(const struct key *key) rwsem_is_locked(&((struct key *)(KEY))->sem))) #define rcu_assign_keypointer(KEY, PAYLOAD) \ - (rcu_assign_pointer((KEY)->payload.rcudata, PAYLOAD)) +do { \ + rcu_assign_pointer((KEY)->payload.rcudata, (PAYLOAD)); \ +} while (0) #ifdef CONFIG_SYSCTL extern ctl_table key_sysctls[]; -- cgit v1.2.3 From e9023c4061054cbf59c5288068118a4d0b152f01 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 May 2012 15:51:08 -0700 Subject: rcu: Remove return value from rcu_assign_pointer() The return value from rcu_assign_pointer() is not used, and using it would be quite ugly, for example: q = rcu_assign_pointer(global_p, p); To prevent this sort of ugliness from spreading, this commit wraps rcu_assign_pointer() in a do-while loop. Reported-by: Mathieu Desnoyers Reported-by: Josh Triplett Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index abf44d89c6ce..fb8e6db511d7 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -513,10 +513,10 @@ static inline void rcu_preempt_sleep_check(void) (_________p1); \ }) #define __rcu_assign_pointer(p, v, space) \ - ({ \ + do { \ smp_wmb(); \ (p) = (typeof(*v) __force space *)(v); \ - }) + } while (0) /** @@ -851,7 +851,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * * Assigns the specified value to the specified RCU-protected * pointer, ensuring that any concurrent RCU readers will see - * any prior initialization. Returns the value assigned. + * any prior initialization. * * Inserts memory barriers on architectures that require them * (which is most of them), and also prevents the compiler from -- cgit v1.2.3 From 2a3fa843b555d202e682bf08c65ee1a4a93c79cf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 21 May 2012 11:58:36 -0700 Subject: rcu: Consolidate tree/tiny __rcu_read_{,un}lock() implementations The CONFIG_TREE_PREEMPT_RCU and CONFIG_TINY_PREEMPT_RCU versions of __rcu_read_lock() and __rcu_read_unlock() are identical, so this commit consolidates them into kernel/rcupdate.h. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 1 + kernel/rcupdate.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ kernel/rcutiny_plugin.h | 47 +---------------------------------------------- kernel/rcutree_plugin.h | 47 +---------------------------------------------- 4 files changed, 47 insertions(+), 92 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index fb8e6db511d7..698555ebf49b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -147,6 +147,7 @@ extern void synchronize_sched(void); extern void __rcu_read_lock(void); extern void __rcu_read_unlock(void); +extern void rcu_read_unlock_special(struct task_struct *t); void synchronize_rcu(void); /* diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 95cba41ce1e9..4e6a61b15e86 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -53,6 +53,50 @@ #ifdef CONFIG_PREEMPT_RCU +/* + * Preemptible RCU implementation for rcu_read_lock(). + * Just increment ->rcu_read_lock_nesting, shared state will be updated + * if we block. + */ +void __rcu_read_lock(void) +{ + current->rcu_read_lock_nesting++; + barrier(); /* critical section after entry code. */ +} +EXPORT_SYMBOL_GPL(__rcu_read_lock); + +/* + * Preemptible RCU implementation for rcu_read_unlock(). + * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost + * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then + * invoke rcu_read_unlock_special() to clean up after a context switch + * in an RCU read-side critical section and other special cases. + */ +void __rcu_read_unlock(void) +{ + struct task_struct *t = current; + + if (t->rcu_read_lock_nesting != 1) { + --t->rcu_read_lock_nesting; + } else { + barrier(); /* critical section before exit code. */ + t->rcu_read_lock_nesting = INT_MIN; + barrier(); /* assign before ->rcu_read_unlock_special load */ + if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) + rcu_read_unlock_special(t); + barrier(); /* ->rcu_read_unlock_special load before assign */ + t->rcu_read_lock_nesting = 0; + } +#ifdef CONFIG_PROVE_LOCKING + { + int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); + + WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); + } +#endif /* #ifdef CONFIG_PROVE_LOCKING */ +} +EXPORT_SYMBOL_GPL(__rcu_read_unlock); + /* * Check for a task exiting while in a preemptible-RCU read-side * critical section, clean up if so. No need to issue warnings, diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index fc31a2d65100..a269b0da0eb6 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -132,7 +132,6 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { RCU_TRACE(.rcb.name = "rcu_preempt") }; -static void rcu_read_unlock_special(struct task_struct *t); static int rcu_preempted_readers_exp(void); static void rcu_report_exp_done(void); @@ -526,24 +525,12 @@ void rcu_preempt_note_context_switch(void) local_irq_restore(flags); } -/* - * Tiny-preemptible RCU implementation for rcu_read_lock(). - * Just increment ->rcu_read_lock_nesting, shared state will be updated - * if we block. - */ -void __rcu_read_lock(void) -{ - current->rcu_read_lock_nesting++; - barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */ -} -EXPORT_SYMBOL_GPL(__rcu_read_lock); - /* * Handle special cases during rcu_read_unlock(), such as needing to * notify RCU core processing or task having blocked during the RCU * read-side critical section. */ -static noinline void rcu_read_unlock_special(struct task_struct *t) +void rcu_read_unlock_special(struct task_struct *t) { int empty; int empty_exp; @@ -626,38 +613,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) local_irq_restore(flags); } -/* - * Tiny-preemptible RCU implementation for rcu_read_unlock(). - * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost - * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then - * invoke rcu_read_unlock_special() to clean up after a context switch - * in an RCU read-side critical section and other special cases. - */ -void __rcu_read_unlock(void) -{ - struct task_struct *t = current; - - barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */ - if (t->rcu_read_lock_nesting != 1) - --t->rcu_read_lock_nesting; - else { - t->rcu_read_lock_nesting = INT_MIN; - barrier(); /* assign before ->rcu_read_unlock_special load */ - if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) - rcu_read_unlock_special(t); - barrier(); /* ->rcu_read_unlock_special load before assign */ - t->rcu_read_lock_nesting = 0; - } -#ifdef CONFIG_PROVE_LOCKING - { - int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); - - WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); - } -#endif /* #ifdef CONFIG_PROVE_LOCKING */ -} -EXPORT_SYMBOL_GPL(__rcu_read_unlock); - /* * Check for a quiescent state from the current CPU. When a task blocks, * the task is recorded in the rcu_preempt_ctrlblk structure, which is diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3e4899459f3d..4b6b17cdf66b 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -78,7 +78,6 @@ struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); static struct rcu_state *rcu_state = &rcu_preempt_state; -static void rcu_read_unlock_special(struct task_struct *t); static int rcu_preempted_readers_exp(struct rcu_node *rnp); /* @@ -232,18 +231,6 @@ static void rcu_preempt_note_context_switch(int cpu) local_irq_restore(flags); } -/* - * Tree-preemptible RCU implementation for rcu_read_lock(). - * Just increment ->rcu_read_lock_nesting, shared state will be updated - * if we block. - */ -void __rcu_read_lock(void) -{ - current->rcu_read_lock_nesting++; - barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ -} -EXPORT_SYMBOL_GPL(__rcu_read_lock); - /* * Check for preempted RCU readers blocking the current grace period * for the specified rcu_node structure. If the caller needs a reliable @@ -310,7 +297,7 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t, * notify RCU core processing or task having blocked during the RCU * read-side critical section. */ -static noinline void rcu_read_unlock_special(struct task_struct *t) +void rcu_read_unlock_special(struct task_struct *t) { int empty; int empty_exp; @@ -418,38 +405,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) } } -/* - * Tree-preemptible RCU implementation for rcu_read_unlock(). - * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost - * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then - * invoke rcu_read_unlock_special() to clean up after a context switch - * in an RCU read-side critical section and other special cases. - */ -void __rcu_read_unlock(void) -{ - struct task_struct *t = current; - - if (t->rcu_read_lock_nesting != 1) - --t->rcu_read_lock_nesting; - else { - barrier(); /* critical section before exit code. */ - t->rcu_read_lock_nesting = INT_MIN; - barrier(); /* assign before ->rcu_read_unlock_special load */ - if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) - rcu_read_unlock_special(t); - barrier(); /* ->rcu_read_unlock_special load before assign */ - t->rcu_read_lock_nesting = 0; - } -#ifdef CONFIG_PROVE_LOCKING - { - int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); - - WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); - } -#endif /* #ifdef CONFIG_PROVE_LOCKING */ -} -EXPORT_SYMBOL_GPL(__rcu_read_unlock); - #ifdef CONFIG_RCU_CPU_STALL_VERBOSE /* -- cgit v1.2.3 From 28f5c693d03530eb15c5354f758b789189b98c37 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 25 May 2012 14:25:58 -0700 Subject: rcu: Remove function versions of __kfree_rcu and __is_kfree_rcu_offset Commit d8169d4c (Make __kfree_rcu() less dependent on compiler choices) added cpp macro versions of __kfree_rcu() and __is_kfree_rcu_offset(), but failed to remove the old inline-function versions. This commit does this cleanup. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 698555ebf49b..31568c734525 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -916,24 +916,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define RCU_POINTER_INITIALIZER(p, v) \ .p = (typeof(*v) __force __rcu *)(v) -static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) -{ - return offset < 4096; -} - -static __always_inline -void __kfree_rcu(struct rcu_head *head, unsigned long offset) -{ - typedef void (*rcu_callback)(struct rcu_head *); - - BUILD_BUG_ON(!__builtin_constant_p(offset)); - - /* See the kfree_rcu() header comment. */ - BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); - - kfree_call_rcu(head, (rcu_callback)offset); -} - /* * Does the specified offset indicate that the corresponding rcu_head * structure can be handled by kfree_rcu()? -- cgit v1.2.3 From 62fde6edf12b60fddb13a3f0a779c8be0bb7447e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 22 May 2012 22:10:24 -0700 Subject: rcu: Make __call_rcu() handle invocation from idle Although __call_rcu() is handled correctly when called from a momentary non-idle period, if it is called on a CPU that RCU believes to be idle on RCU_FAST_NO_HZ kernels, the callback might be indefinitely postponed. This commit therefore ensures that RCU is aware of the new callback and has a chance to force the CPU out of dyntick-idle mode when a new callback is posted. Reported-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 13 ++++--------- kernel/rcutree.c | 15 +++++++++------ 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 31568c734525..26f6417f0264 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -256,6 +256,10 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) +extern int rcu_is_cpu_idle(void); +#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */ + #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) bool rcu_lockdep_current_cpu_online(void); #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ @@ -267,15 +271,6 @@ static inline bool rcu_lockdep_current_cpu_online(void) #ifdef CONFIG_DEBUG_LOCK_ALLOC -#ifdef CONFIG_PROVE_RCU -extern int rcu_is_cpu_idle(void); -#else /* !CONFIG_PROVE_RCU */ -static inline int rcu_is_cpu_idle(void) -{ - return 0; -} -#endif /* else !CONFIG_PROVE_RCU */ - static inline void rcu_lock_acquire(struct lockdep_map *map) { lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 89addada3e3a..a4a9c916ad36 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -585,8 +585,6 @@ void rcu_nmi_exit(void) WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); } -#ifdef CONFIG_PROVE_RCU - /** * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle * @@ -604,7 +602,7 @@ int rcu_is_cpu_idle(void) } EXPORT_SYMBOL(rcu_is_cpu_idle); -#ifdef CONFIG_HOTPLUG_CPU +#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) /* * Is the current CPU online? Disable preemption to avoid false positives @@ -645,9 +643,7 @@ bool rcu_lockdep_current_cpu_online(void) } EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ - -#endif /* #ifdef CONFIG_PROVE_RCU */ +#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */ /** * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle @@ -1904,6 +1900,13 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), else trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); + /* + * If called from an extended quiescent state, invoke the RCU + * core in order to force a re-evaluation of RCU's idleness. + */ + if (rcu_is_cpu_idle()) + invoke_rcu_core(); + /* If interrupts were disabled, don't dive into RCU core. */ if (irqs_disabled_flags(flags)) { local_irq_restore(flags); -- cgit v1.2.3 From a16b7a693430406dc229ab0c6b154f669a2031c5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 26 May 2012 08:56:01 -0700 Subject: rcu: Prevent __call_rcu() from invoking RCU core on offline CPUs The __call_rcu() function will invoke the RCU core, for example, if it detects that the current CPU has too many callbacks. However, this can happen on an offline CPU that is on its way to the idle loop, in which case it is an error to invoke the RCU core, and the excess callbacks will be adopted in any case. This commit therefore adds checks to __call_rcu() for running on an offline CPU, refraining from invoking the RCU core in this case. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index a4a9c916ad36..ceaa95923a87 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1904,11 +1904,11 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), * If called from an extended quiescent state, invoke the RCU * core in order to force a re-evaluation of RCU's idleness. */ - if (rcu_is_cpu_idle()) + if (rcu_is_cpu_idle() && cpu_online(smp_processor_id())) invoke_rcu_core(); - /* If interrupts were disabled, don't dive into RCU core. */ - if (irqs_disabled_flags(flags)) { + /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ + if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id())) { local_irq_restore(flags); return; } -- cgit v1.2.3 From 29154c57e35a191c83b19c61b1935c9f21957662 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 30 May 2012 03:21:48 -0700 Subject: rcu: Split RCU core processing out of __call_rcu() The __call_rcu() function is a bit overweight, so this commit splits it into actual enqueuing of and accounting for the callback (__call_rcu()) and associated RCU-core processing (__call_rcu_core()). Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 90 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ceaa95923a87..70c4da7d2a97 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1861,45 +1861,12 @@ static void invoke_rcu_core(void) raise_softirq(RCU_SOFTIRQ); } -static void -__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), - struct rcu_state *rsp, bool lazy) +/* + * Handle any core-RCU processing required by a call_rcu() invocation. + */ +static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, + struct rcu_head *head, unsigned long flags) { - unsigned long flags; - struct rcu_data *rdp; - - WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */ - debug_rcu_head_queue(head); - head->func = func; - head->next = NULL; - - smp_mb(); /* Ensure RCU update seen before callback registry. */ - - /* - * Opportunistically note grace-period endings and beginnings. - * Note that we might see a beginning right after we see an - * end, but never vice versa, since this CPU has to pass through - * a quiescent state betweentimes. - */ - local_irq_save(flags); - rdp = this_cpu_ptr(rsp->rda); - - /* Add the callback to our list. */ - ACCESS_ONCE(rdp->qlen)++; - if (lazy) - rdp->qlen_lazy++; - else - rcu_idle_count_callbacks_posted(); - smp_mb(); /* Count before adding callback for rcu_barrier(). */ - *rdp->nxttail[RCU_NEXT_TAIL] = head; - rdp->nxttail[RCU_NEXT_TAIL] = &head->next; - - if (__is_kfree_rcu_offset((unsigned long)func)) - trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, - rdp->qlen_lazy, rdp->qlen); - else - trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); - /* * If called from an extended quiescent state, invoke the RCU * core in order to force a re-evaluation of RCU's idleness. @@ -1908,10 +1875,8 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), invoke_rcu_core(); /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ - if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id())) { - local_irq_restore(flags); + if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id())) return; - } /* * Force the grace period if too many callbacks or too long waiting. @@ -1944,6 +1909,49 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), } } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) force_quiescent_state(rsp, 1); +} + +static void +__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), + struct rcu_state *rsp, bool lazy) +{ + unsigned long flags; + struct rcu_data *rdp; + + WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */ + debug_rcu_head_queue(head); + head->func = func; + head->next = NULL; + + smp_mb(); /* Ensure RCU update seen before callback registry. */ + + /* + * Opportunistically note grace-period endings and beginnings. + * Note that we might see a beginning right after we see an + * end, but never vice versa, since this CPU has to pass through + * a quiescent state betweentimes. + */ + local_irq_save(flags); + rdp = this_cpu_ptr(rsp->rda); + + /* Add the callback to our list. */ + ACCESS_ONCE(rdp->qlen)++; + if (lazy) + rdp->qlen_lazy++; + else + rcu_idle_count_callbacks_posted(); + smp_mb(); /* Count before adding callback for rcu_barrier(). */ + *rdp->nxttail[RCU_NEXT_TAIL] = head; + rdp->nxttail[RCU_NEXT_TAIL] = &head->next; + + if (__is_kfree_rcu_offset((unsigned long)func)) + trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, + rdp->qlen_lazy, rdp->qlen); + else + trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); + + /* Go handle any RCU core processing required. */ + __call_rcu_core(rsp, rdp, head, flags); local_irq_restore(flags); } -- cgit v1.2.3 From d7118175cce7e76b3292b60832813ef1f28b6523 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 6 Jun 2012 07:12:43 -0700 Subject: rcu: Fix rcu_is_cpu_idle() #ifdef in TINY_RCU The rcu_is_cpu_idle() function is used if CONFIG_DEBUG_LOCK_ALLOC, but TINY_RCU defines it only when CONFIG_PROVE_RCU. This causes build failures when CONFIG_DEBUG_LOCK_ALLOC=y but CONFIG_PROVE_RCU=n. This commit therefore adjusts the #ifdefs for rcu_is_cpu_idle() so that it is defined when CONFIG_DEBUG_LOCK_ALLOC=y. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutiny.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 37a5444204d2..547b1fe5b052 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -172,7 +172,7 @@ void rcu_irq_enter(void) local_irq_restore(flags); } -#ifdef CONFIG_PROVE_RCU +#ifdef CONFIG_DEBUG_LOCK_ALLOC /* * Test whether RCU thinks that the current CPU is idle. @@ -183,7 +183,7 @@ int rcu_is_cpu_idle(void) } EXPORT_SYMBOL(rcu_is_cpu_idle); -#endif /* #ifdef CONFIG_PROVE_RCU */ +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ /* * Test whether the current CPU was interrupted from idle. Nested -- cgit v1.2.3 From 1c17e4d4437d8045a596d9f06c1558dc09e2b372 Mon Sep 17 00:00:00 2001 From: Carsten Emde Date: Tue, 19 Jun 2012 10:43:16 +0200 Subject: rcu: Prevent uninitialized string in RCU CPU stall info An uninitialized string may be displayed at the end of the rcu_preempt detected stall info such as 0: (1 GPs behind) idle=075/140000000000000/0 =8?^D=8?^D ^^^^^^^^^^ if CONFIG_RCU_FAST_NO_HZ is not defined. This trivial patch clears the string in this case. Signed-off-by: Carsten Emde Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 4b6b17cdf66b..395cdd1e0634 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2224,6 +2224,7 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu) static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { + *cp = '\0'; } #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ -- cgit v1.2.3 From 95f0c1de3e6ed4383cc4b5f52ce4ecfb21026b49 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 19 Jun 2012 11:58:27 -0700 Subject: rcu: Disable preemption in rcu_blocking_is_gp() It is time to optimize CONFIG_TREE_PREEMPT_RCU's synchronize_rcu() for uniprocessor optimization, which means that rcu_blocking_is_gp() can no longer rely on RCU read-side critical sections having disabled preemption. This commit therefore disables preemption across rcu_blocking_is_gp()'s scan of the cpu_online_mask. (Updated from previous version to fix embarrassing bug spotted by Wu Fengguang.) Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 70c4da7d2a97..e000a623e635 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1981,28 +1981,16 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); * occasionally incorrectly indicate that there are multiple CPUs online * when there was in fact only one the whole time, as this just adds * some overhead: RCU still operates correctly. - * - * Of course, sampling num_online_cpus() with preemption enabled can - * give erroneous results if there are concurrent CPU-hotplug operations. - * For example, given a demonic sequence of preemptions in num_online_cpus() - * and CPU-hotplug operations, there could be two or more CPUs online at - * all times, but num_online_cpus() might well return one (or even zero). - * - * However, all such demonic sequences require at least one CPU-offline - * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer - * is only a problem if there is an RCU read-side critical section executing - * throughout. But RCU-sched and RCU-bh read-side critical sections - * disable either preemption or bh, which prevents a CPU from going offline. - * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return - * that there is only one CPU when in fact there was more than one throughout - * is when there were no RCU readers in the system. If there are no - * RCU readers, the grace period by definition can be of zero length, - * regardless of the number of online CPUs. */ static inline int rcu_blocking_is_gp(void) { + int ret; + might_sleep(); /* Check for RCU read-side critical section. */ - return num_online_cpus() <= 1; + preempt_disable(); + ret = num_online_cpus() <= 1; + preempt_enable(); + return ret; } /** -- cgit v1.2.3 From cf01537ecf192d9ff15c32a355db5d5af22bea4d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 21 Jun 2012 11:26:42 -0700 Subject: rcu: Add check for CPUs going offline with callbacks queued If a CPU goes offline with callbacks queued, those callbacks might be indefinitely postponed, which can result in a system hang. This commit therefore inserts warnings for this condition. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e000a623e635..95c7b61e77e4 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1509,6 +1509,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) raw_spin_unlock_irqrestore(&rnp->lock, flags); if (need_report & RCU_OFL_TASKS_EXP_GP) rcu_report_exp_rnp(rsp, rnp, true); + WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, + "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", + cpu, rdp->qlen, rdp->nxtlist); } #else /* #ifdef CONFIG_HOTPLUG_CPU */ -- cgit v1.2.3 From bf1304e9cd755be7814ac8365834b7a1d0f06b58 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 May 2012 15:55:39 -0700 Subject: rcu: Dump only the current CPU's buffers for idle-entry/exit warnings Problems in RCU idle entry and exit are almost always confined to the offending CPU. This commit therefore switches ftrace_dump() from DUMP_ALL to DUMP_ORIG. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Heiko Carstens Tested-by: Pascal Chapperon --- kernel/rcutree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4b97bba7396e..6eb48f13eeeb 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -358,7 +358,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) struct task_struct *idle = idle_task(smp_processor_id()); trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); - ftrace_dump(DUMP_ALL); + ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ @@ -468,7 +468,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) trace_rcu_dyntick("Error on exit: not idle task", oldval, rdtp->dynticks_nesting); - ftrace_dump(DUMP_ALL); + ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ -- cgit v1.2.3 From 28f8555364ef5b54b21251c5c8022109a70626e9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 May 2012 15:37:48 -0700 Subject: rcu: The rcu_needs_cpu() function is not a quiescent state The TINY_PREEMPT_RCU() function rcu_preempt_needs_cpu(), which is called from rcu_needs_cpu(), assumes that it is in a quiescent state with respect to the CPU. This is no longer the case. This commit therefore updates rcu_preempt_needs_cpu() to make it aware that it is not running in a quiescent state. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Heiko Carstens Tested-by: Pascal Chapperon --- kernel/rcutiny_plugin.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index fc31a2d65100..849ede51c12d 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -846,8 +846,6 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); */ int rcu_preempt_needs_cpu(void) { - if (!rcu_preempt_running_reader()) - rcu_preempt_cpu_qs(); return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; } -- cgit v1.2.3 From e84c48ae3024ac2f14ed1c3671e5ea37c60fb838 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jun 2012 20:45:10 -0700 Subject: rcu: Round FAST_NO_HZ lazy timeout to nearest second Currently, if several CPUs in the same package have all lazy RCU callbacks, their wakeups will be uncorrelated. If all the CPUs are in the same power domain (as is often the case), this will result in unnecessary power-ups of the package. This commit therefore uses round_jiffies() to round the timeouts to a second boundary, increasing the odds that they can be coalesced with each other or with other timeouts. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3e4899459f3d..c28d25522a81 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1968,7 +1968,7 @@ static void rcu_idle_count_callbacks_posted(void) */ #define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ #define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ -#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ +#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ /* @@ -2047,10 +2047,13 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) return 1; } /* Set up for the possibility that RCU will post a timer. */ - if (rcu_cpu_has_nonlazy_callbacks(cpu)) - *delta_jiffies = RCU_IDLE_GP_DELAY; - else - *delta_jiffies = RCU_IDLE_LAZY_GP_DELAY; + if (rcu_cpu_has_nonlazy_callbacks(cpu)) { + *delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies, + RCU_IDLE_GP_DELAY) - jiffies; + } else { + *delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY; + *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies; + } return 0; } @@ -2187,10 +2190,11 @@ static void rcu_prepare_for_idle(int cpu) if (rcu_cpu_has_nonlazy_callbacks(cpu)) { trace_rcu_prep_idle("Dyntick with callbacks"); rdtp->idle_gp_timer_expires = - jiffies + RCU_IDLE_GP_DELAY; + round_up(jiffies + RCU_IDLE_GP_DELAY, + RCU_IDLE_GP_DELAY); } else { rdtp->idle_gp_timer_expires = - jiffies + RCU_IDLE_LAZY_GP_DELAY; + round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY); trace_rcu_prep_idle("Dyntick with lazy callbacks"); } tp = &rdtp->idle_gp_timer; -- cgit v1.2.3 From 4fa3b6cb1bc8c14b81b4c8ffdfd3f2500a7e9367 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 5 Jun 2012 15:53:53 -0700 Subject: rcu: Fix qlen_lazy breakage Commit d8169d4c (Make __kfree_rcu() less dependent on compiler choices) created a macro out of an inline function in order to avoid build breakage for certain combinations of gcc flags. Unfortunately, it also converted a kfree_call_rcu() to a call_rcu(), which made the rcu_data structure's ->qlen_lazy field lose counts. This commit therefore changes the call_rcu() back to kfree_call_rcu(). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9cac722b169c..46d45e0f9134 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -935,7 +935,7 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) #define __kfree_rcu(head, offset) \ do { \ BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \ - call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ + kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ } while (0) /** -- cgit v1.2.3 From 9d2ad24306f2fafc3612e5a216aab31f9e56e879 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 24 Jun 2012 10:15:02 -0700 Subject: rcu: Make RCU_FAST_NO_HZ respect nohz= boot parameter If the nohz= boot parameter disables nohz, then RCU_FAST_NO_HZ needs to also disable itself. This commit therefore checks for tick_nohz_enabled being zero, disabling rcu_prepare_for_idle() if so. This commit assumes that tick_nohz_enabled can change at runtime: If this is not the case, then a simpler approach suffices. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.h | 1 + kernel/rcutree_plugin.h | 15 +++++++++++++++ kernel/time/tick-sched.c | 2 +- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 19b61ac1079f..e978845c6800 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -97,6 +97,7 @@ struct rcu_dynticks { /* # times non-lazy CBs posted to CPU. */ unsigned long nonlazy_posted_snap; /* idle-period nonlazy_posted snapshot. */ + int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ }; diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c28d25522a81..3508000e077d 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1971,6 +1971,8 @@ static void rcu_idle_count_callbacks_posted(void) #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ +extern int tick_nohz_enabled; + /* * Does the specified flavor of RCU have non-lazy callbacks pending on * the specified CPU? Both RCU flavor and CPU are specified by the @@ -2112,6 +2114,7 @@ static void rcu_cleanup_after_idle(int cpu) del_timer(&rdtp->idle_gp_timer); trace_rcu_prep_idle("Cleanup after idle"); + rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled); } /* @@ -2137,6 +2140,18 @@ static void rcu_prepare_for_idle(int cpu) { struct timer_list *tp; struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + int tne; + + /* Handle nohz enablement switches conservatively. */ + tne = ACCESS_ONCE(tick_nohz_enabled); + if (tne != rdtp->tick_nohz_enabled_snap) { + if (rcu_cpu_has_callbacks(cpu)) + invoke_rcu_core(); /* force nohz to see update. */ + rdtp->tick_nohz_enabled_snap = tne; + return; + } + if (!tne) + return; /* * If this is an idle re-entry, for example, due to use of diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 869997833928..66ff07f6184c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -105,7 +105,7 @@ static ktime_t tick_init_jiffy_update(void) /* * NO HZ enabled ? */ -static int tick_nohz_enabled __read_mostly = 1; +int tick_nohz_enabled __read_mostly = 1; /* * Enable / Disable tickless mode -- cgit v1.2.3 From ea8e867d9b505b1c852f9932ba75504bc4eebeaa Mon Sep 17 00:00:00 2001 From: Jayachandran C Date: Tue, 8 May 2012 18:11:55 +0530 Subject: MIPS: Netlogic: Fix PCIX irq on XLR chips The correct irq is PIC_PCIX_IRQ Signed-off-by: Jayachandran C Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/3750/ Signed-off-by: Ralf Baechle --- arch/mips/pci/pci-xlr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c index 1644805a6730..50ff4dc28b4f 100644 --- a/arch/mips/pci/pci-xlr.c +++ b/arch/mips/pci/pci-xlr.c @@ -327,7 +327,7 @@ static int __init pcibios_init(void) } } else { /* XLR PCI controller ACK */ - irq_set_handler_data(PIC_PCIE_XLSB0_LINK3_IRQ, xlr_pci_ack); + irq_set_handler_data(PIC_PCIX_IRQ, xlr_pci_ack); } return 0; -- cgit v1.2.3 From 249e2a38fbd28ffaadba112290742ada16946900 Mon Sep 17 00:00:00 2001 From: Ganesan Ramalingam Date: Tue, 8 May 2012 18:11:56 +0530 Subject: MIPS: Netlogic: MSI enable fix for XLS MSI interrupts do not work on XLS after commit a776c49 ( "PCI: msi: Disable msi interrupts when we initialize a pci device" ) because the change disables MSI interrupts on the XLS PCIe bridges during the PCI enumeration. Fix this by enabling MSI interrupts on the bridge in the arch_setup_msi_irq() function. A new function xls_get_pcie_link() has been introduced to get the PCI device corresponding to the top level PCIe bridge on which MSI has to be enabled. Also, update get_irq_vector() to use the new xls_get_pcie_link() function and PCI_SLOT() macro for determining the IRQ of PCI devices. Signed-off-by: Ganesan Ramalingam Signed-off-by: Jayachandran C Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/3753/ Signed-off-by: Ralf Baechle --- arch/mips/pci/pci-xlr.c | 59 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 9 deletions(-) diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c index 50ff4dc28b4f..172af1cd5867 100644 --- a/arch/mips/pci/pci-xlr.c +++ b/arch/mips/pci/pci-xlr.c @@ -41,6 +41,7 @@ #include #include #include +#include #include @@ -156,35 +157,55 @@ struct pci_controller nlm_pci_controller = { .io_offset = 0x00000000UL, }; +/* + * The top level PCIe links on the XLS PCIe controller appear as + * bridges. Given a device, this function finds which link it is + * on. + */ +static struct pci_dev *xls_get_pcie_link(const struct pci_dev *dev) +{ + struct pci_bus *bus, *p; + + /* Find the bridge on bus 0 */ + bus = dev->bus; + for (p = bus->parent; p && p->number != 0; p = p->parent) + bus = p; + + return p ? bus->self : NULL; +} + static int get_irq_vector(const struct pci_dev *dev) { + struct pci_dev *lnk; + if (!nlm_chip_is_xls()) - return PIC_PCIX_IRQ; /* for XLR just one IRQ*/ + return PIC_PCIX_IRQ; /* for XLR just one IRQ */ /* * For XLS PCIe, there is an IRQ per Link, find out which * link the device is on to assign interrupts - */ - if (dev->bus->self == NULL) + */ + lnk = xls_get_pcie_link(dev); + if (lnk == NULL) return 0; - switch (dev->bus->self->devfn) { - case 0x0: + switch (PCI_SLOT(lnk->devfn)) { + case 0: return PIC_PCIE_LINK0_IRQ; - case 0x8: + case 1: return PIC_PCIE_LINK1_IRQ; - case 0x10: + case 2: if (nlm_chip_is_xls_b()) return PIC_PCIE_XLSB0_LINK2_IRQ; else return PIC_PCIE_LINK2_IRQ; - case 0x18: + case 3: if (nlm_chip_is_xls_b()) return PIC_PCIE_XLSB0_LINK3_IRQ; else return PIC_PCIE_LINK3_IRQ; } - WARN(1, "Unexpected devfn %d\n", dev->bus->self->devfn); + WARN(1, "Unexpected devfn %d\n", lnk->devfn); return 0; } @@ -202,7 +223,27 @@ void arch_teardown_msi_irq(unsigned int irq) int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { struct msi_msg msg; + struct pci_dev *lnk; int irq, ret; + u16 val; + + /* MSI not supported on XLR */ + if (!nlm_chip_is_xls()) + return 1; + + /* + * Enable MSI on the XLS PCIe controller bridge which was disabled + * at enumeration, the bridge MSI capability is at 0x50 + */ + lnk = xls_get_pcie_link(dev); + if (lnk == NULL) + return 1; + + pci_read_config_word(lnk, 0x50 + PCI_MSI_FLAGS, &val); + if ((val & PCI_MSI_FLAGS_ENABLE) == 0) { + val |= PCI_MSI_FLAGS_ENABLE; + pci_write_config_word(lnk, 0x50 + PCI_MSI_FLAGS, val); + } irq = get_irq_vector(dev); if (irq <= 0) -- cgit v1.2.3 From b876c1a0bcfe68a5eaf03f325cb2a09822bf11f2 Mon Sep 17 00:00:00 2001 From: Jayachandran C Date: Tue, 3 Jul 2012 19:04:02 +0200 Subject: MIPS: Netlogic: Fix TLB size of boot CPU. Starting other threads in the core will change the number of TLB entries of a CPU. Re-calculate current_cpu_data.tlbsize on the boot cpu after enabling and waking up other threads. The secondary CPUs do not need this logic because the threads are enabled on the secondary cores at wakeup and before cpu_probe. Signed-off-by: Jayachandran C Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/3751/ Signed-off-by: Ralf Baechle --- arch/mips/netlogic/xlp/setup.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c index acb677a1227c..b3df7c2aad1e 100644 --- a/arch/mips/netlogic/xlp/setup.c +++ b/arch/mips/netlogic/xlp/setup.c @@ -82,8 +82,10 @@ void __init prom_free_prom_memory(void) void xlp_mmu_init(void) { + /* enable extended TLB and Large Fixed TLB */ write_c0_config6(read_c0_config6() | 0x24); - current_cpu_data.tlbsize = ((read_c0_config6() >> 16) & 0xffff) + 1; + + /* set page mask of Fixed TLB in config7 */ write_c0_config7(PM_DEFAULT_MASK >> (13 + (ffz(PM_DEFAULT_MASK >> 13) / 2))); } @@ -100,6 +102,10 @@ void __init prom_init(void) nlm_common_ebase = read_c0_ebase() & (~((1 << 12) - 1)); #ifdef CONFIG_SMP nlm_wakeup_secondary_cpus(0xffffffff); + + /* update TLB size after waking up threads */ + current_cpu_data.tlbsize = ((read_c0_config6() >> 16) & 0xffff) + 1; + register_smp_ops(&nlm_smp_ops); #endif } -- cgit v1.2.3 From e84c282b40251f314c429f39b044785e323f2648 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 22 May 2012 14:45:21 +0900 Subject: tools lib traceevent: Let filtering numbers by string use function names As a pointer can be converted into a function name, let the filters work with the function name as well as with the pointer number. If the comparison expects a string, then convert numbers into functions, but only when the number is the same size as a long. Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/n/tip-oxsa1qkr2eq7u8d7r0aapedu@git.kernel.org Signed-off-by: Namhyung Kim --- tools/lib/traceevent/parse-filter.c | 45 ++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index dfcfe2c131de..80d872a81f26 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1710,18 +1710,43 @@ static int test_num(struct event_format *event, static const char *get_field_str(struct filter_arg *arg, struct pevent_record *record) { - const char *val = record->data + arg->str.field->offset; + struct event_format *event; + struct pevent *pevent; + unsigned long long addr; + const char *val = NULL; + char hex[64]; - /* - * We need to copy the data since we can't be sure the field - * is null terminated. - */ - if (*(val + arg->str.field->size - 1)) { - /* copy it */ - memcpy(arg->str.buffer, val, arg->str.field->size); - /* the buffer is already NULL terminated */ - val = arg->str.buffer; + /* If the field is not a string convert it */ + if (arg->str.field->flags & FIELD_IS_STRING) { + val = record->data + arg->str.field->offset; + + /* + * We need to copy the data since we can't be sure the field + * is null terminated. + */ + if (*(val + arg->str.field->size - 1)) { + /* copy it */ + memcpy(arg->str.buffer, val, arg->str.field->size); + /* the buffer is already NULL terminated */ + val = arg->str.buffer; + } + + } else { + event = arg->str.field->event; + pevent = event->pevent; + addr = get_value(event, arg->str.field, record); + + if (arg->str.field->flags & (FIELD_IS_POINTER | FIELD_IS_LONG)) + /* convert to a kernel symbol */ + val = pevent_find_function(pevent, addr); + + if (val == NULL) { + /* just use the hex of the string name */ + snprintf(hex, 64, "0x%llx", addr); + val = hex; + } } + return val; } -- cgit v1.2.3 From c2e6dc2b268cca44d522b2ee86147f0d30d7e3e4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 15 Nov 2011 18:47:48 -0500 Subject: tools lib traceevent: Add support for "%.*s" in bprintk events The arg notation of '*' in bprintks is not handled by the parser. Implement it so that they show up properly in the output and do not kill the tracer from reporting events. Reported-by: Johannes Berg Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/n/tip-t0ctq7t1xz3ud6wv4v886jou@git.kernel.org Signed-off-by: Namhyung Kim --- tools/lib/traceevent/event-parse.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 83f0a8add177..0644c2a23ad6 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3370,7 +3370,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, break; } case PRINT_BSTRING: - trace_seq_printf(s, format, arg->string.string); + print_str_to_seq(s, format, len_arg, arg->string.string); break; case PRINT_OP: /* @@ -3471,6 +3471,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc unsigned long long ip, val; char *ptr; void *bptr; + int vsize; field = pevent->bprint_buf_field; ip_field = pevent->bprint_ip_field; @@ -3519,6 +3520,8 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc goto process_again; case '0' ... '9': goto process_again; + case '.': + goto process_again; case 'p': ls = 1; /* fall through */ @@ -3526,23 +3529,29 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc case 'u': case 'x': case 'i': - /* the pointers are always 4 bytes aligned */ - bptr = (void *)(((unsigned long)bptr + 3) & - ~3); switch (ls) { case 0: - ls = 4; + vsize = 4; break; case 1: - ls = pevent->long_size; + vsize = pevent->long_size; break; case 2: - ls = 8; + vsize = 8; default: + vsize = ls; /* ? */ break; } - val = pevent_read_number(pevent, bptr, ls); - bptr += ls; + /* fall through */ + case '*': + if (*ptr == '*') + vsize = 4; + + /* the pointers are always 4 bytes aligned */ + bptr = (void *)(((unsigned long)bptr + 3) & + ~3); + val = pevent_read_number(pevent, bptr, vsize); + bptr += vsize; arg = alloc_arg(); arg->next = NULL; arg->type = PRINT_ATOM; @@ -3550,6 +3559,13 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc sprintf(arg->atom.atom, "%lld", val); *next = arg; next = &arg->next; + /* + * The '*' case means that an arg is used as the length. + * We need to continue to figure out for what. + */ + if (*ptr == '*') + goto process_again; + break; case 's': arg = alloc_arg(); -- cgit v1.2.3 From 0866a97eb7562409ba792f5e904841dd8e23c8b5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 22 May 2012 14:52:40 +0900 Subject: tools lib traceevent: Add support to show migrate disable counter The RT kernel added a migrate disable counter in all events. Add support to show this in the latency format. Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/n/tip-l6ulxyda952g7kua4pfsh73k@git.kernel.org Signed-off-by: Namhyung Kim --- tools/lib/traceevent/event-parse.c | 57 +++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 0644c2a23ad6..872dd35db051 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2884,7 +2884,7 @@ static int get_common_info(struct pevent *pevent, event = pevent->events[0]; field = pevent_find_common_field(event, type); if (!field) - die("field '%s' not found", type); + return -1; *offset = field->offset; *size = field->size; @@ -2935,15 +2935,16 @@ static int parse_common_flags(struct pevent *pevent, void *data) static int parse_common_lock_depth(struct pevent *pevent, void *data) { - int ret; - - ret = __parse_common(pevent, data, - &pevent->ld_size, &pevent->ld_offset, - "common_lock_depth"); - if (ret < 0) - return -1; + return __parse_common(pevent, data, + &pevent->ld_size, &pevent->ld_offset, + "common_lock_depth"); +} - return ret; +static int parse_common_migrate_disable(struct pevent *pevent, void *data) +{ + return __parse_common(pevent, data, + &pevent->ld_size, &pevent->ld_offset, + "common_migrate_disable"); } static int events_id_cmp(const void *a, const void *b); @@ -3988,10 +3989,13 @@ void pevent_data_lat_fmt(struct pevent *pevent, struct trace_seq *s, struct pevent_record *record) { static int check_lock_depth = 1; + static int check_migrate_disable = 1; static int lock_depth_exists; + static int migrate_disable_exists; unsigned int lat_flags; unsigned int pc; int lock_depth; + int migrate_disable; int hardirq; int softirq; void *data = record->data; @@ -3999,18 +4003,26 @@ void pevent_data_lat_fmt(struct pevent *pevent, lat_flags = parse_common_flags(pevent, data); pc = parse_common_pc(pevent, data); /* lock_depth may not always exist */ - if (check_lock_depth) { - struct format_field *field; - struct event_format *event; - - check_lock_depth = 0; - event = pevent->events[0]; - field = pevent_find_common_field(event, "common_lock_depth"); - if (field) - lock_depth_exists = 1; - } if (lock_depth_exists) lock_depth = parse_common_lock_depth(pevent, data); + else if (check_lock_depth) { + lock_depth = parse_common_lock_depth(pevent, data); + if (lock_depth < 0) + check_lock_depth = 0; + else + lock_depth_exists = 1; + } + + /* migrate_disable may not always exist */ + if (migrate_disable_exists) + migrate_disable = parse_common_migrate_disable(pevent, data); + else if (check_migrate_disable) { + migrate_disable = parse_common_migrate_disable(pevent, data); + if (migrate_disable < 0) + check_migrate_disable = 0; + else + migrate_disable_exists = 1; + } hardirq = lat_flags & TRACE_FLAG_HARDIRQ; softirq = lat_flags & TRACE_FLAG_SOFTIRQ; @@ -4029,6 +4041,13 @@ void pevent_data_lat_fmt(struct pevent *pevent, else trace_seq_putc(s, '.'); + if (migrate_disable_exists) { + if (migrate_disable < 0) + trace_seq_putc(s, '.'); + else + trace_seq_printf(s, "%d", migrate_disable); + } + if (lock_depth_exists) { if (lock_depth < 0) trace_seq_putc(s, '.'); -- cgit v1.2.3 From aaf05c725bed1c7a8c940d9215662c78bea05dfd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 9 Jan 2012 15:58:09 -0500 Subject: tools lib traceevent: Fix %pM print format arg handling When %pM is used, the arg value must be a 6 byte character that will be printed as a 6 byte MAC address. But the code does a break over the main code which updates the current processing arg to point to the next arg. If there are other print arguments after a %pM, they will be off by one. The next arg will still be processing the %pM arg. Reported-by: Johannes Berg Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/n/tip-q3g0n1espikynsdkpbi6ue6t@git.kernel.org Signed-off-by: Namhyung Kim --- tools/lib/traceevent/event-parse.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 872dd35db051..da06c33dcf41 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3858,6 +3858,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event } else if (*(ptr+1) == 'M' || *(ptr+1) == 'm') { print_mac_arg(s, *(ptr+1), data, size, event, arg); ptr++; + arg = arg->next; break; } -- cgit v1.2.3 From c5b35b731965d16fa8c966e288489857097e0b25 Mon Sep 17 00:00:00 2001 From: Wolfgang Mauerer Date: Thu, 22 Mar 2012 11:18:21 +0100 Subject: tools lib traceevent: Fix trace_printk for long integers On 32 bit systems, a conversion of the trace_printk format string "%lu" -> "%llu" is intended (similar for %lx etc.) when a trace was taken on a machine with 64 bit long integers. However, the current code computes the bogus transformation "%lu" -> "%u". Fix this. Besides that, the transformation is only required on systems that don't use 64 bits for long integers natively. Signed-off-by: Wolfgang Mauerer Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/1332411501-8059-3-git-send-email-wolfgang.mauerer@siemens.com Signed-off-by: Namhyung Kim --- tools/lib/traceevent/event-parse.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index da06c33dcf41..ddee5a8cf135 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3895,14 +3895,15 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event break; } } - if (pevent->long_size == 8 && ls) { + if (pevent->long_size == 8 && ls && + sizeof(long) != 8) { char *p; ls = 2; /* make %l into %ll */ p = strchr(format, 'l'); if (p) - memmove(p, p+1, strlen(p)+1); + memmove(p+1, p, strlen(p)+1); else if (strcmp(format, "%p") == 0) strcpy(format, "0x%llx"); } -- cgit v1.2.3 From 0fc45ef5202a34b5862ca246740e6ab50bc3e3e1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 9 Apr 2012 11:54:29 +0900 Subject: tools lib traceevent: Fix printk_cmp() The printk_cmp function should use printk_map instead of func_map. Also rename the variables for consistency. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: David Ahern Link: http://lkml.kernel.org/r/1333940074-19052-3-git-send-email-namhyung.kim@lge.com Signed-off-by: Steven Rostedt Signed-off-by: Namhyung Kim --- tools/lib/traceevent/event-parse.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index ddee5a8cf135..7815b8d2eabd 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -511,12 +511,12 @@ struct printk_list { static int printk_cmp(const void *a, const void *b) { - const struct func_map *fa = a; - const struct func_map *fb = b; + const struct printk_map *pa = a; + const struct printk_map *pb = b; - if (fa->addr < fb->addr) + if (pa->addr < pb->addr) return -1; - if (fa->addr > fb->addr) + if (pa->addr > pb->addr) return 1; return 0; -- cgit v1.2.3 From deba3fb26fd1ed3235b00dccced9784a7f76ec3c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 9 Apr 2012 11:54:30 +0900 Subject: tools lib traceevent: Introduce extend_token() The __read_token() function has some duplicated code to handle internal buffer overflow. Factor them out to new extend_token(). According to the man pages of realloc/free(3), they can handle NULL pointer input so that it can be ended up to compact the code. Also handle error path correctly. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: David Ahern Link: http://lkml.kernel.org/r/1333940074-19052-4-git-send-email-namhyung.kim@lge.com [rostedt@goodmis.org: added some extra whitespace] Signed-off-by: Steven Rostedt Signed-off-by: Namhyung Kim --- tools/lib/traceevent/event-parse.c | 50 ++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 7815b8d2eabd..768fab5fbcb7 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -781,6 +781,25 @@ int pevent_peek_char(void) return __peek_char(); } +static int extend_token(char **tok, char *buf, int size) +{ + char *newtok = realloc(*tok, size); + + if (!newtok) { + free(*tok); + *tok = NULL; + return -1; + } + + if (!*tok) + strcpy(newtok, buf); + else + strcat(newtok, buf); + *tok = newtok; + + return 0; +} + static enum event_type force_token(const char *str, char **tok); static enum event_type __read_token(char **tok) @@ -865,17 +884,10 @@ static enum event_type __read_token(char **tok) do { if (i == (BUFSIZ - 1)) { buf[i] = 0; - if (*tok) { - *tok = realloc(*tok, tok_size + BUFSIZ); - if (!*tok) - return EVENT_NONE; - strcat(*tok, buf); - } else - *tok = strdup(buf); + tok_size += BUFSIZ; - if (!*tok) + if (extend_token(tok, buf, tok_size) < 0) return EVENT_NONE; - tok_size += BUFSIZ; i = 0; } last_ch = ch; @@ -914,17 +926,10 @@ static enum event_type __read_token(char **tok) while (get_type(__peek_char()) == type) { if (i == (BUFSIZ - 1)) { buf[i] = 0; - if (*tok) { - *tok = realloc(*tok, tok_size + BUFSIZ); - if (!*tok) - return EVENT_NONE; - strcat(*tok, buf); - } else - *tok = strdup(buf); + tok_size += BUFSIZ; - if (!*tok) + if (extend_token(tok, buf, tok_size) < 0) return EVENT_NONE; - tok_size += BUFSIZ; i = 0; } ch = __read_char(); @@ -933,14 +938,7 @@ static enum event_type __read_token(char **tok) out: buf[i] = 0; - if (*tok) { - *tok = realloc(*tok, tok_size + i); - if (!*tok) - return EVENT_NONE; - strcat(*tok, buf); - } else - *tok = strdup(buf); - if (!*tok) + if (extend_token(tok, buf, tok_size + i + 1) < 0) return EVENT_NONE; if (type == EVENT_ITEM) { -- cgit v1.2.3 From ca63858e9eb0ce495031c4ab5291874835cb43cf Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 9 Apr 2012 11:54:31 +0900 Subject: tools lib traceevent: Handle strdup failure cases There were some places didn't check return value of the strdup and had unneeded/duplicated checks. Fix it. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: David Ahern Link: http://lkml.kernel.org/r/1333940074-19052-5-git-send-email-namhyung.kim@lge.com Signed-off-by: Steven Rostedt Signed-off-by: Namhyung Kim --- tools/lib/traceevent/event-parse.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 768fab5fbcb7..cd334d52d333 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -467,8 +467,10 @@ int pevent_register_function(struct pevent *pevent, char *func, item->mod = NULL; item->addr = addr; - pevent->funclist = item; + if (!item->func || (mod && !item->mod)) + die("malloc func"); + pevent->funclist = item; pevent->func_count++; return 0; @@ -583,10 +585,13 @@ int pevent_register_print_string(struct pevent *pevent, char *fmt, item = malloc_or_die(sizeof(*item)); item->next = pevent->printklist; - pevent->printklist = item; item->printk = strdup(fmt); item->addr = addr; + if (!item->printk) + die("malloc fmt"); + + pevent->printklist = item; pevent->printk_count++; return 0; @@ -2150,6 +2155,8 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char ** if (value == NULL) goto out_free; field->value = strdup(value); + if (field->value == NULL) + goto out_free; free_arg(arg); arg = alloc_arg(); @@ -2163,6 +2170,8 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char ** if (value == NULL) goto out_free; field->str = strdup(value); + if (field->str == NULL) + goto out_free; free_arg(arg); arg = NULL; @@ -3433,6 +3442,9 @@ process_defined_func(struct trace_seq *s, void *data, int size, string = malloc_or_die(sizeof(*string)); string->next = strings; string->str = strdup(str.buffer); + if (!string->str) + die("malloc str"); + strings = string; trace_seq_destroy(&str); break; @@ -3571,6 +3583,8 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc arg->next = NULL; arg->type = PRINT_BSTRING; arg->string.string = strdup(bptr); + if (!arg->string.string) + break; bptr += strlen(bptr) + 1; *next = arg; next = &arg->next; @@ -4666,6 +4680,8 @@ int pevent_parse_event(struct pevent *pevent, die("failed to read event id"); event->system = strdup(sys); + if (!event->system) + die("failed to allocate system"); /* Add pevent to event so that it can be referenced */ event->pevent = pevent; @@ -4707,6 +4723,10 @@ int pevent_parse_event(struct pevent *pevent, list = &arg->next; arg->type = PRINT_FIELD; arg->field.name = strdup(field->name); + if (!arg->field.name) { + do_warning("failed to allocate field name"); + goto event_failed; + } arg->field.field = field; } return 0; @@ -5050,6 +5070,11 @@ int pevent_register_event_handler(struct pevent *pevent, if (sys_name) handle->sys_name = strdup(sys_name); + if ((event_name && !handle->event_name) || + (sys_name && !handle->sys_name)) { + die("Failed to allocate event/sys name"); + } + handle->func = func; handle->next = pevent->handlers; pevent->handlers = handle; -- cgit v1.2.3 From d286447f23cdb0337a5429e10b39761f6b1d5c18 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 9 Apr 2012 11:54:33 +0900 Subject: tools lib traceevent: Handle realloc() failure path The realloc can fail so that we should handle it properly. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: David Ahern Link: http://lkml.kernel.org/r/1333940074-19052-7-git-send-email-namhyung.kim@lge.com Signed-off-by: Steven Rostedt Signed-off-by: Namhyung Kim --- tools/lib/traceevent/event-parse.c | 76 ++++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index cd334d52d333..05eb6b40b16a 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1264,9 +1264,15 @@ static int event_read_fields(struct event_format *event, struct format_field **f field->flags |= FIELD_IS_POINTER; if (field->type) { - field->type = realloc(field->type, - strlen(field->type) + - strlen(last_token) + 2); + char *new_type; + new_type = realloc(field->type, + strlen(field->type) + + strlen(last_token) + 2); + if (!new_type) { + free(last_token); + goto fail; + } + field->type = new_type; strcat(field->type, " "); strcat(field->type, last_token); free(last_token); @@ -1291,6 +1297,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f if (strcmp(token, "[") == 0) { enum event_type last_type = type; char *brackets = token; + char *new_brackets; int len; field->flags |= FIELD_IS_ARRAY; @@ -1310,9 +1317,14 @@ static int event_read_fields(struct event_format *event, struct format_field **f len = 1; last_type = type; - brackets = realloc(brackets, - strlen(brackets) + - strlen(token) + len); + new_brackets = realloc(brackets, + strlen(brackets) + + strlen(token) + len); + if (!new_brackets) { + free(brackets); + goto fail; + } + brackets = new_brackets; if (len == 2) strcat(brackets, " "); strcat(brackets, token); @@ -1328,7 +1340,12 @@ static int event_read_fields(struct event_format *event, struct format_field **f free_token(token); - brackets = realloc(brackets, strlen(brackets) + 2); + new_brackets = realloc(brackets, strlen(brackets) + 2); + if (!new_brackets) { + free(brackets); + goto fail; + } + brackets = new_brackets; strcat(brackets, "]"); /* add brackets to type */ @@ -1339,10 +1356,16 @@ static int event_read_fields(struct event_format *event, struct format_field **f * the format: type [] item; */ if (type == EVENT_ITEM) { - field->type = realloc(field->type, - strlen(field->type) + - strlen(field->name) + - strlen(brackets) + 2); + char *new_type; + new_type = realloc(field->type, + strlen(field->type) + + strlen(field->name) + + strlen(brackets) + 2); + if (!new_type) { + free(brackets); + goto fail; + } + field->type = new_type; strcat(field->type, " "); strcat(field->type, field->name); free_token(field->name); @@ -1350,9 +1373,15 @@ static int event_read_fields(struct event_format *event, struct format_field **f field->name = token; type = read_token(&token); } else { - field->type = realloc(field->type, - strlen(field->type) + - strlen(brackets) + 1); + char *new_type; + new_type = realloc(field->type, + strlen(field->type) + + strlen(brackets) + 1); + if (!new_type) { + free(brackets); + goto fail; + } + field->type = new_type; strcat(field->type, brackets); } free(brackets); @@ -1735,10 +1764,16 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) /* could just be a type pointer */ if ((strcmp(arg->op.op, "*") == 0) && type == EVENT_DELIM && (strcmp(token, ")") == 0)) { + char *new_atom; + if (left->type != PRINT_ATOM) die("bad pointer type"); - left->atom.atom = realloc(left->atom.atom, + new_atom = realloc(left->atom.atom, strlen(left->atom.atom) + 3); + if (!new_atom) + goto out_free; + + left->atom.atom = new_atom; strcat(left->atom.atom, " *"); free(arg->op.op); *arg = *left; @@ -2597,7 +2632,16 @@ process_arg_token(struct event_format *event, struct print_arg *arg, } /* atoms can be more than one token long */ while (type == EVENT_ITEM) { - atom = realloc(atom, strlen(atom) + strlen(token) + 2); + char *new_atom; + new_atom = realloc(atom, + strlen(atom) + strlen(token) + 2); + if (!new_atom) { + free(atom); + *tok = NULL; + free_token(token); + return EVENT_ERROR; + } + atom = new_atom; strcat(atom, " "); strcat(atom, token); free_token(token); -- cgit v1.2.3 From 3831a42deba59bf26618b0dd6fa1320a0a94ebd9 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 23 Apr 2012 13:58:33 +0900 Subject: tools lib traceevent: Pass string type argument to args It seems PEVENT_FUNC_ARG_STRING missed passing the allocated string to the args array. Fix it. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: David Ahern Link: http://lkml.kernel.org/r/1335157118-14658-7-git-send-email-namhyung.kim@lge.com Signed-off-by: Steven Rostedt Signed-off-by: Namhyung Kim --- tools/lib/traceevent/event-parse.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 05eb6b40b16a..337ca02fb525 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3489,6 +3489,7 @@ process_defined_func(struct trace_seq *s, void *data, int size, if (!string->str) die("malloc str"); + args[i] = (unsigned long long)string->str; strings = string; trace_seq_destroy(&str); break; -- cgit v1.2.3 From 4b5632bc3122ec9b7e875294c8abe92f1573196a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 23 Apr 2012 13:58:34 +0900 Subject: tools lib traceevent: Do not call add_event() again if allocation failed When memory allocation for the field name is failed, do not goto event_failed since we added the event already. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: David Ahern Link: http://lkml.kernel.org/r/1335157118-14658-8-git-send-email-namhyung.kim@lge.com Signed-off-by: Steven Rostedt Signed-off-by: Namhyung Kim --- tools/lib/traceevent/event-parse.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 337ca02fb525..99b0cd4b79d4 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4770,7 +4770,8 @@ int pevent_parse_event(struct pevent *pevent, arg->field.name = strdup(field->name); if (!arg->field.name) { do_warning("failed to allocate field name"); - goto event_failed; + event->flags |= EVENT_FL_FAILED; + return -1; } arg->field.field = field; } -- cgit v1.2.3 From 16e6b8fdfd181ac79f04ff826c42c7d8a2806a17 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 23 Apr 2012 13:58:35 +0900 Subject: tools lib traceevent: Fix some comments Update and add missing argument descriptions and fix some typo on function comments. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: David Ahern Link: http://lkml.kernel.org/r/1335157118-14658-9-git-send-email-namhyung.kim@lge.com Signed-off-by: Steven Rostedt Signed-off-by: Namhyung Kim --- tools/lib/traceevent/event-parse.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 99b0cd4b79d4..863016040dd6 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4037,8 +4037,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event * pevent_data_lat_fmt - parse the data for the latency format * @pevent: a handle to the pevent * @s: the trace_seq to write to - * @data: the raw data to read from - * @size: currently unused. + * @record: the record to read from * * This parses out the Latency format (interrupts disabled, * need rescheduling, in hard/soft interrupt, preempt count @@ -4173,10 +4172,7 @@ const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid) * pevent_data_comm_from_pid - parse the data into the print format * @s: the trace_seq to write to * @event: the handle to the event - * @cpu: the cpu the event was recorded on - * @data: the raw data - * @size: the size of the raw data - * @nsecs: the timestamp of the event + * @record: the record to read from * * This parses the raw @data using the given @event information and * writes the print format into the trace_seq. @@ -4944,7 +4940,7 @@ int pevent_get_any_field_val(struct trace_seq *s, struct event_format *event, * @record: The record with the field name. * @err: print default error if failed. * - * Returns: 0 on success, -1 field not fould, or 1 if buffer is full. + * Returns: 0 on success, -1 field not found, or 1 if buffer is full. */ int pevent_print_num_field(struct trace_seq *s, const char *fmt, struct event_format *event, const char *name, @@ -4986,11 +4982,12 @@ static void free_func_handle(struct pevent_function_handler *func) * pevent_register_print_function - register a helper function * @pevent: the handle to the pevent * @func: the function to process the helper function + * @ret_type: the return type of the helper function * @name: the name of the helper function * @parameters: A list of enum pevent_func_arg_type * * Some events may have helper functions in the print format arguments. - * This allows a plugin to dynmically create a way to process one + * This allows a plugin to dynamically create a way to process one * of these functions. * * The @parameters is a variable list of pevent_func_arg_type enums that @@ -5061,12 +5058,13 @@ int pevent_register_print_function(struct pevent *pevent, } /** - * pevent_register_event_handle - register a way to parse an event + * pevent_register_event_handler - register a way to parse an event * @pevent: the handle to the pevent * @id: the id of the event to register * @sys_name: the system name the event belongs to * @event_name: the name of the event * @func: the function to call to parse the event information + * @context: the data to be passed to @func * * This function allows a developer to override the parsing of * a given event. If for some reason the default print format -- cgit v1.2.3 From e54b34aed1c4082ed03f4d1f7a19276059b1e30a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 23 Apr 2012 13:58:36 +0900 Subject: tools lib traceevent: Check result of malloc() during reading token The malloc can fail so the return value should be checked. For now, just use malloc_or_die(). Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: David Ahern Link: http://lkml.kernel.org/r/1335157118-14658-10-git-send-email-namhyung.kim@lge.com Signed-off-by: Steven Rostedt Signed-off-by: Namhyung Kim --- tools/lib/traceevent/parse-filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 80d872a81f26..d54c2b4dbd9f 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -96,7 +96,7 @@ static enum event_type read_token(char **tok) (strcmp(token, "=") == 0 || strcmp(token, "!") == 0) && pevent_peek_char() == '~') { /* append it */ - *tok = malloc(3); + *tok = malloc_or_die(3); sprintf(*tok, "%c%c", *token, '~'); free_token(token); /* Now remove the '~' from the buffer */ -- cgit v1.2.3 From 0fed48341529716c38493be66591bda458921b75 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 23 Apr 2012 13:58:38 +0900 Subject: tools lib traceevent: Check return value of arg_to_str() The arg_to_str() can fail so we should handle that case properly. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: David Ahern Link: http://lkml.kernel.org/r/1335157118-14658-12-git-send-email-namhyung.kim@lge.com Signed-off-by: Steven Rostedt Signed-off-by: Namhyung Kim --- tools/lib/traceevent/parse-filter.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index d54c2b4dbd9f..f020ac61f9c1 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -2026,11 +2026,13 @@ static char *exp_to_str(struct event_filter *filter, struct filter_arg *arg) char *lstr; char *rstr; char *op; - char *str; + char *str = NULL; int len; lstr = arg_to_str(filter, arg->exp.left); rstr = arg_to_str(filter, arg->exp.right); + if (!lstr || !rstr) + goto out; switch (arg->exp.type) { case FILTER_EXP_ADD: @@ -2070,6 +2072,7 @@ static char *exp_to_str(struct event_filter *filter, struct filter_arg *arg) len = strlen(op) + strlen(lstr) + strlen(rstr) + 4; str = malloc_or_die(len); snprintf(str, len, "%s %s %s", lstr, op, rstr); +out: free(lstr); free(rstr); @@ -2086,6 +2089,8 @@ static char *num_to_str(struct event_filter *filter, struct filter_arg *arg) lstr = arg_to_str(filter, arg->num.left); rstr = arg_to_str(filter, arg->num.right); + if (!lstr || !rstr) + goto out; switch (arg->num.type) { case FILTER_CMP_EQ: @@ -2122,6 +2127,7 @@ static char *num_to_str(struct event_filter *filter, struct filter_arg *arg) break; } +out: free(lstr); free(rstr); return str; @@ -2272,7 +2278,12 @@ int pevent_filter_compare(struct event_filter *filter1, struct event_filter *fil /* The best way to compare complex filters is with strings */ str1 = arg_to_str(filter1, filter_type1->filter); str2 = arg_to_str(filter2, filter_type2->filter); - result = strcmp(str1, str2) != 0; + if (str1 && str2) + result = strcmp(str1, str2) != 0; + else + /* bail out if allocation fails */ + result = 1; + free(str1); free(str2); if (result) -- cgit v1.2.3 From c9bbabe32a231b5caa8c42fa6783405346983c59 Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Tue, 24 Apr 2012 23:19:40 +0200 Subject: tools lib traceevent: Add missing break in make_bprint_args In the current code we assign vsize=8 and then fall through to the default and assign vsize=1. -> probably the break is missing here, otherwise we can remove the case. Signed-off-by: Peter Huewe Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/n/tip-3fxjy46h2tr9pl0spv7tems6@git.kernel.org Signed-off-by: Namhyung Kim --- tools/lib/traceevent/event-parse.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 863016040dd6..f880db457842 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3594,6 +3594,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc break; case 2: vsize = 8; + break; default: vsize = ls; /* ? */ break; -- cgit v1.2.3 From f6ced60fb6c0ee115982157457c47e48802d6e1d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 24 Apr 2012 10:29:44 +0900 Subject: tools lib traceevent: Cleanup realloc use The if branch is completely unnecessary since 'realloc' can handle NULL pointers for the first parameter. This patch is just an adoption of Ulrich Drepper's recent patch on perf tools. Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1335230984-7613-1-git-send-email-namhyung.kim@lge.com Signed-off-by: Steven Rostedt Signed-off-by: Namhyung Kim --- tools/lib/traceevent/event-parse.c | 8 ++------ tools/lib/traceevent/parse-filter.c | 24 ++++++++---------------- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index f880db457842..5f34aa371b56 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -633,12 +633,8 @@ static void add_event(struct pevent *pevent, struct event_format *event) { int i; - if (!pevent->events) - pevent->events = malloc_or_die(sizeof(event)); - else - pevent->events = - realloc(pevent->events, sizeof(event) * - (pevent->nr_events + 1)); + pevent->events = realloc(pevent->events, sizeof(event) * + (pevent->nr_events + 1)); if (!pevent->events) die("Can not allocate events"); diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index f020ac61f9c1..ad17855528f9 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -148,17 +148,11 @@ add_filter_type(struct event_filter *filter, int id) if (filter_type) return filter_type; - if (!filter->filters) - filter->event_filters = - malloc_or_die(sizeof(*filter->event_filters)); - else { - filter->event_filters = - realloc(filter->event_filters, - sizeof(*filter->event_filters) * - (filter->filters + 1)); - if (!filter->event_filters) - die("Could not allocate filter"); - } + filter->event_filters = realloc(filter->event_filters, + sizeof(*filter->event_filters) * + (filter->filters + 1)); + if (!filter->event_filters) + die("Could not allocate filter"); for (i = 0; i < filter->filters; i++) { if (filter->event_filters[i].event_id > id) @@ -1480,7 +1474,7 @@ void pevent_filter_clear_trivial(struct event_filter *filter, { struct filter_type *filter_type; int count = 0; - int *ids; + int *ids = NULL; int i; if (!filter->filters) @@ -1504,10 +1498,8 @@ void pevent_filter_clear_trivial(struct event_filter *filter, default: break; } - if (count) - ids = realloc(ids, sizeof(*ids) * (count + 1)); - else - ids = malloc(sizeof(*ids)); + + ids = realloc(ids, sizeof(*ids) * (count + 1)); if (!ids) die("Can't allocate ids"); ids[count++] = filter_type->event_id; -- cgit v1.2.3 From 0c47935c5b5cd4916cf1c1ed4a2894807f7bcc3e Mon Sep 17 00:00:00 2001 From: Daniel Nicoletti Date: Wed, 4 Jul 2012 10:20:31 -0300 Subject: HID: add battery quirk for Apple Wireless ANSI Add USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI, to the quirk list since it report wrong feature type and wrong percentage range. Signed-off-by: Daniel Nicoletti Signed-off-by: Jiri Kosina --- drivers/hid/hid-input.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 132b0019365e..5301006f6c15 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -301,6 +301,9 @@ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, + USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI), + HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, {} }; -- cgit v1.2.3 From ebf124ffab59e9e1c6179347fe95fe5baf32dcd7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 4 Jul 2012 00:00:47 +0200 Subject: perf test: Use ARRAY_SIZE in parse events tests Use ARRAY_SIZE instead of defining the sizes separately for each test arrays. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1341352848-11833-10-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events-test.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index dd0c306a0698..1b997d2b89ce 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -634,8 +634,6 @@ static struct test__event_st test__events[] = { }, }; -#define TEST__EVENTS_CNT (sizeof(test__events) / sizeof(struct test__event_st)) - static struct test__event_st test__events_pmu[] = { [0] = { .name = "cpu/config=10,config1,config2=3,period=1000/u", @@ -647,9 +645,6 @@ static struct test__event_st test__events_pmu[] = { }, }; -#define TEST__EVENTS_PMU_CNT (sizeof(test__events_pmu) / \ - sizeof(struct test__event_st)) - struct test__term { const char *str; __u32 type; @@ -765,21 +760,17 @@ int parse_events__test(void) { int ret; - do { - ret = test_events(test__events, TEST__EVENTS_CNT); - if (ret) - break; - - if (test_pmu()) { - ret = test_events(test__events_pmu, - TEST__EVENTS_PMU_CNT); - if (ret) - break; - } +#define TEST_EVENTS(tests) \ +do { \ + ret = test_events(tests, ARRAY_SIZE(tests)); \ + if (ret) \ + return ret; \ +} while (0) - ret = test_terms(test__terms, TEST__TERMS_CNT); + TEST_EVENTS(test__events); - } while (0); + if (test_pmu()) + TEST_EVENTS(test__events_pmu); - return ret; + return test_terms(test__terms, ARRAY_SIZE(test__terms)); } -- cgit v1.2.3 From 8c5f0a84c6b16e04195001bfd78281909519cf09 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 4 Jul 2012 00:00:39 +0200 Subject: perf tools: Add empty rule for new line in event syntax parsing The flex generator prints out each input character that is ignored by lex rules. Since the alias processing, we can have '\n' characters on input. We need to assign empty rule to it, so it's not printed out. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1341352848-11833-2-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.l | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index a06689474210..6cbe092e6c3b 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -152,6 +152,7 @@ r{num_raw_hex} { return raw(yyscanner); } , { return ','; } : { return ':'; } = { return '='; } +\n { } { {modifier_bp} { return str(yyscanner, PE_MODIFIER_BP); } -- cgit v1.2.3 From cf3506dcc4a855d11af20bcb271ac921033ba0de Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 4 Jul 2012 00:00:43 +0200 Subject: perf tools: Split out PE_VALUE_SYM parsing token to SW and HW tokens Spliting PE_VALUE_SYM token to PE_VALUE_SYM_HW and PE_VALUE_SYM_SW tokens to separate hardware and software symbols. This will be useful in upcomming patch where we want to be able to parse out only hardware events. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1341352848-11833-6-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.l | 2 +- tools/perf/util/parse-events.y | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 6cbe092e6c3b..384ca74c6b22 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -56,7 +56,7 @@ static int sym(yyscan_t scanner, int type, int config) YYSTYPE *yylval = parse_events_get_lval(scanner); yylval->num = (type << 16) + config; - return PE_VALUE_SYM; + return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW; } static int term(yyscan_t scanner, int type) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 9525c455d27f..2bc5fbff2b5d 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -26,14 +26,15 @@ do { \ %} %token PE_START_EVENTS PE_START_TERMS -%token PE_VALUE PE_VALUE_SYM PE_RAW PE_TERM +%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM %token PE_NAME %token PE_MODIFIER_EVENT PE_MODIFIER_BP %token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT %token PE_PREFIX_MEM PE_PREFIX_RAW %token PE_ERROR %type PE_VALUE -%type PE_VALUE_SYM +%type PE_VALUE_SYM_HW +%type PE_VALUE_SYM_SW %type PE_RAW %type PE_TERM %type PE_NAME @@ -41,6 +42,7 @@ do { \ %type PE_NAME_CACHE_OP_RESULT %type PE_MODIFIER_EVENT %type PE_MODIFIER_BP +%type value_sym %type event_config %type event_term %type event_pmu @@ -109,8 +111,13 @@ PE_NAME '/' event_config '/' $$ = list; } +value_sym: +PE_VALUE_SYM_HW +| +PE_VALUE_SYM_SW + event_legacy_symbol: -PE_VALUE_SYM '/' event_config '/' +value_sym '/' event_config '/' { struct parse_events_data__events *data = _data; struct list_head *list = NULL; @@ -123,7 +130,7 @@ PE_VALUE_SYM '/' event_config '/' $$ = list; } | -PE_VALUE_SYM sep_slash_dc +value_sym sep_slash_dc { struct parse_events_data__events *data = _data; struct list_head *list = NULL; -- cgit v1.2.3 From 1dc12760854a670d0366dcfd8ad179e3574c1712 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 4 Jul 2012 00:00:44 +0200 Subject: perf tools: Split event symbols arrays to hw and sw parts It'll be convenient in upcoming patch to access hw event symbols strings via enum perf_hw_id indexes. In order not to duplicate the data, creating two separate arrays: event_symbols_hw for enum perf_hw_id events event_symbols_sw for enum perf_sw_ids events Changing the current event list code to follow the change. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1341352848-11833-7-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 174 +++++++++++++++++++++++++++-------------- 1 file changed, 117 insertions(+), 57 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 1dc44dc69133..1aa721d7c10f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -19,8 +19,6 @@ #define MAX_NAME_LEN 100 struct event_symbol { - u8 type; - u64 config; const char *symbol; const char *alias; }; @@ -30,30 +28,86 @@ extern int parse_events_debug; #endif int parse_events_parse(void *data, void *scanner); -#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x -#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x - -static struct event_symbol event_symbols[] = { - { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, - { CHW(STALLED_CYCLES_FRONTEND), "stalled-cycles-frontend", "idle-cycles-frontend" }, - { CHW(STALLED_CYCLES_BACKEND), "stalled-cycles-backend", "idle-cycles-backend" }, - { CHW(INSTRUCTIONS), "instructions", "" }, - { CHW(CACHE_REFERENCES), "cache-references", "" }, - { CHW(CACHE_MISSES), "cache-misses", "" }, - { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, - { CHW(BRANCH_MISSES), "branch-misses", "" }, - { CHW(BUS_CYCLES), "bus-cycles", "" }, - { CHW(REF_CPU_CYCLES), "ref-cycles", "" }, - - { CSW(CPU_CLOCK), "cpu-clock", "" }, - { CSW(TASK_CLOCK), "task-clock", "" }, - { CSW(PAGE_FAULTS), "page-faults", "faults" }, - { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, - { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, - { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, - { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, - { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" }, - { CSW(EMULATION_FAULTS), "emulation-faults", "" }, +static struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = { + .symbol = "cpu-cycles", + .alias = "cycles", + }, + [PERF_COUNT_HW_INSTRUCTIONS] = { + .symbol = "instructions", + .alias = "", + }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { + .symbol = "cache-references", + .alias = "", + }, + [PERF_COUNT_HW_CACHE_MISSES] = { + .symbol = "cache-misses", + .alias = "", + }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { + .symbol = "branch-instructions", + .alias = "branches", + }, + [PERF_COUNT_HW_BRANCH_MISSES] = { + .symbol = "branch-misses", + .alias = "", + }, + [PERF_COUNT_HW_BUS_CYCLES] = { + .symbol = "bus-cycles", + .alias = "", + }, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = { + .symbol = "stalled-cycles-frontend", + .alias = "idle-cycles-frontend", + }, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = { + .symbol = "stalled-cycles-backend", + .alias = "idle-cycles-backend", + }, + [PERF_COUNT_HW_REF_CPU_CYCLES] = { + .symbol = "ref-cycles", + .alias = "", + }, +}; + +static struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { + [PERF_COUNT_SW_CPU_CLOCK] = { + .symbol = "cpu-clock", + .alias = "", + }, + [PERF_COUNT_SW_TASK_CLOCK] = { + .symbol = "task-clock", + .alias = "", + }, + [PERF_COUNT_SW_PAGE_FAULTS] = { + .symbol = "page-faults", + .alias = "faults", + }, + [PERF_COUNT_SW_CONTEXT_SWITCHES] = { + .symbol = "context-switches", + .alias = "cs", + }, + [PERF_COUNT_SW_CPU_MIGRATIONS] = { + .symbol = "cpu-migrations", + .alias = "migrations", + }, + [PERF_COUNT_SW_PAGE_FAULTS_MIN] = { + .symbol = "minor-faults", + .alias = "", + }, + [PERF_COUNT_SW_PAGE_FAULTS_MAJ] = { + .symbol = "major-faults", + .alias = "", + }, + [PERF_COUNT_SW_ALIGNMENT_FAULTS] = { + .symbol = "alignment-faults", + .alias = "", + }, + [PERF_COUNT_SW_EMULATION_FAULTS] = { + .symbol = "emulation-faults", + .alias = "", + }, }; #define __PERF_EVENT_FIELD(config, name) \ @@ -824,16 +878,13 @@ int is_valid_tracepoint(const char *event_string) return 0; } -void print_events_type(u8 type) +static void __print_events_type(u8 type, struct event_symbol *syms, + unsigned max) { - struct event_symbol *syms = event_symbols; - unsigned int i; char name[64]; + unsigned i; - for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { - if (type != syms->type) - continue; - + for (i = 0; i < max ; i++, syms++) { if (strlen(syms->alias)) snprintf(name, sizeof(name), "%s OR %s", syms->symbol, syms->alias); @@ -845,6 +896,14 @@ void print_events_type(u8 type) } } +void print_events_type(u8 type) +{ + if (type == PERF_TYPE_SOFTWARE) + __print_events_type(type, event_symbols_sw, PERF_COUNT_SW_MAX); + else + __print_events_type(type, event_symbols_hw, PERF_COUNT_HW_MAX); +} + int print_hwcache_events(const char *event_glob) { unsigned int type, op, i, printed = 0; @@ -872,26 +931,13 @@ int print_hwcache_events(const char *event_glob) return printed; } -/* - * Print the help text for the event symbols: - */ -void print_events(const char *event_glob) +static void print_symbol_events(const char *event_glob, unsigned type, + struct event_symbol *syms, unsigned max) { - unsigned int i, type, prev_type = -1, printed = 0, ntypes_printed = 0; - struct event_symbol *syms = event_symbols; + unsigned i, printed = 0; char name[MAX_NAME_LEN]; - printf("\n"); - printf("List of pre-defined events (to be used in -e):\n"); - - for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { - type = syms->type; - - if (type != prev_type && printed) { - printf("\n"); - printed = 0; - ntypes_printed++; - } + for (i = 0; i < max; i++, syms++) { if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) || @@ -902,17 +948,31 @@ void print_events(const char *event_glob) snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); else strncpy(name, syms->symbol, MAX_NAME_LEN); - printf(" %-50s [%s]\n", name, - event_type_descriptors[type]); - prev_type = type; - ++printed; + printf(" %-50s [%s]\n", name, event_type_descriptors[type]); + + printed++; } - if (ntypes_printed) { - printed = 0; + if (printed) printf("\n"); - } +} + +/* + * Print the help text for the event symbols: + */ +void print_events(const char *event_glob) +{ + + printf("\n"); + printf("List of pre-defined events (to be used in -e):\n"); + + print_symbol_events(event_glob, PERF_TYPE_HARDWARE, + event_symbols_hw, PERF_COUNT_HW_MAX); + + print_symbol_events(event_glob, PERF_TYPE_SOFTWARE, + event_symbols_sw, PERF_COUNT_SW_MAX); + print_hwcache_events(event_glob); if (event_glob != NULL) -- cgit v1.2.3 From ce5c1fe9a9e059b5c58f0a7e2a3e687d0efac815 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 20 Jun 2012 11:11:38 +0200 Subject: perf/x86: Fix USER/KERNEL tagging of samples Several perf interrupt handlers (PEBS,IBS,BTS) re-write regs->ip but do not update the segment registers. So use an regs->ip based test instead of an regs->cs/regs->flags based test. Reported-and-tested-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-xxrt0a1zronm1sm36obwc2vy@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c4706cf9c011..6ef9d41b87f9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1863,7 +1863,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs) else misc |= PERF_RECORD_MISC_GUEST_KERNEL; } else { - if (user_mode(regs)) + if (!kernel_ip(regs->ip)) misc |= PERF_RECORD_MISC_USER; else misc |= PERF_RECORD_MISC_KERNEL; -- cgit v1.2.3 From 15c7ad51ad58cbd3b46112c1840bc7228bd354bf Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 20 Jun 2012 20:46:33 +0200 Subject: perf/x86: Rename Intel specific macros There are macros that are Intel specific and not x86 generic. Rename them into INTEL_*. This patch removes X86_PMC_IDX_GENERIC and does: $ sed -i -e 's/X86_PMC_MAX_/INTEL_PMC_MAX_/g' \ arch/x86/include/asm/kvm_host.h \ arch/x86/include/asm/perf_event.h \ arch/x86/kernel/cpu/perf_event.c \ arch/x86/kernel/cpu/perf_event_p4.c \ arch/x86/kvm/pmu.c $ sed -i -e 's/X86_PMC_IDX_FIXED/INTEL_PMC_IDX_FIXED/g' \ arch/x86/include/asm/perf_event.h \ arch/x86/kernel/cpu/perf_event.c \ arch/x86/kernel/cpu/perf_event_intel.c \ arch/x86/kernel/cpu/perf_event_intel_ds.c \ arch/x86/kvm/pmu.c $ sed -i -e 's/X86_PMC_MSK_/INTEL_PMC_MSK_/g' \ arch/x86/include/asm/perf_event.h \ arch/x86/kernel/cpu/perf_event.c Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1340217996-2254-2-git-send-email-robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/include/asm/perf_event.h | 17 +++++++------- arch/x86/kernel/cpu/perf_event.c | 38 +++++++++++++++---------------- arch/x86/kernel/cpu/perf_event_intel.c | 14 ++++++------ arch/x86/kernel/cpu/perf_event_intel_ds.c | 4 ++-- arch/x86/kernel/cpu/perf_event_p4.c | 2 +- arch/x86/kvm/pmu.c | 22 +++++++++--------- 7 files changed, 50 insertions(+), 51 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index db7c1f2709a2..2da88c0cda14 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -313,8 +313,8 @@ struct kvm_pmu { u64 counter_bitmask[2]; u64 global_ctrl_mask; u8 version; - struct kvm_pmc gp_counters[X86_PMC_MAX_GENERIC]; - struct kvm_pmc fixed_counters[X86_PMC_MAX_FIXED]; + struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; + struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; struct irq_work irq_work; u64 reprogram_pmi; }; diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 588f52ea810e..3b31248caf60 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -5,11 +5,10 @@ * Performance event hw details: */ -#define X86_PMC_MAX_GENERIC 32 -#define X86_PMC_MAX_FIXED 3 +#define INTEL_PMC_MAX_GENERIC 32 +#define INTEL_PMC_MAX_FIXED 3 +#define INTEL_PMC_IDX_FIXED 32 -#define X86_PMC_IDX_GENERIC 0 -#define X86_PMC_IDX_FIXED 32 #define X86_PMC_IDX_MAX 64 #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 @@ -121,16 +120,16 @@ struct x86_pmu_capability { /* Instr_Retired.Any: */ #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 -#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) +#define INTEL_PMC_IDX_FIXED_INSTRUCTIONS (INTEL_PMC_IDX_FIXED + 0) /* CPU_CLK_Unhalted.Core: */ #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a -#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) +#define INTEL_PMC_IDX_FIXED_CPU_CYCLES (INTEL_PMC_IDX_FIXED + 1) /* CPU_CLK_Unhalted.Ref: */ #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b -#define X86_PMC_IDX_FIXED_REF_CYCLES (X86_PMC_IDX_FIXED + 2) -#define X86_PMC_MSK_FIXED_REF_CYCLES (1ULL << X86_PMC_IDX_FIXED_REF_CYCLES) +#define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2) +#define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES) /* * We model BTS tracing as another fixed-mode PMC. @@ -139,7 +138,7 @@ struct x86_pmu_capability { * values are used by actual fixed events and higher values are used * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. */ -#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) +#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16) /* * IBS cpuid feature detection diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e677d9923f4f..668050002602 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -63,7 +63,7 @@ u64 x86_perf_event_update(struct perf_event *event) int idx = hwc->idx; s64 delta; - if (idx == X86_PMC_IDX_FIXED_BTS) + if (idx == INTEL_PMC_IDX_FIXED_BTS) return 0; /* @@ -626,8 +626,8 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) c = sched->constraints[sched->state.event]; /* Prefer fixed purpose counters */ - if (c->idxmsk64 & (~0ULL << X86_PMC_IDX_FIXED)) { - idx = X86_PMC_IDX_FIXED; + if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { + idx = INTEL_PMC_IDX_FIXED; for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { if (!__test_and_set_bit(idx, sched->state.used)) goto done; @@ -635,7 +635,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) } /* Grab the first unused counter starting with idx */ idx = sched->state.counter; - for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) { + for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) { if (!__test_and_set_bit(idx, sched->state.used)) goto done; } @@ -813,13 +813,13 @@ static inline void x86_assign_hw_event(struct perf_event *event, hwc->last_cpu = smp_processor_id(); hwc->last_tag = ++cpuc->tags[i]; - if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { + if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) { hwc->config_base = 0; hwc->event_base = 0; - } else if (hwc->idx >= X86_PMC_IDX_FIXED) { + } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) { hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED); - hwc->event_base_rdpmc = (hwc->idx - X86_PMC_IDX_FIXED) | 1<<30; + hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED); + hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30; } else { hwc->config_base = x86_pmu_config_addr(hwc->idx); hwc->event_base = x86_pmu_event_addr(hwc->idx); @@ -921,7 +921,7 @@ int x86_perf_event_set_period(struct perf_event *event) s64 period = hwc->sample_period; int ret = 0, idx = hwc->idx; - if (idx == X86_PMC_IDX_FIXED_BTS) + if (idx == INTEL_PMC_IDX_FIXED_BTS) return 0; /* @@ -1338,21 +1338,21 @@ static int __init init_hw_perf_events(void) for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) quirk->func(); - if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { + if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", - x86_pmu.num_counters, X86_PMC_MAX_GENERIC); - x86_pmu.num_counters = X86_PMC_MAX_GENERIC; + x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); + x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; } x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; - if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { + if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", - x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); - x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; + x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED); + x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; } x86_pmu.intel_ctrl |= - ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; + ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; perf_events_lapic_init(); register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); @@ -1368,7 +1368,7 @@ static int __init init_hw_perf_events(void) */ for_each_event_constraint(c, x86_pmu.event_constraints) { if (c->cmask != X86_RAW_EVENT_MASK - || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) { + || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { continue; } @@ -1611,8 +1611,8 @@ static int x86_pmu_event_idx(struct perf_event *event) if (!x86_pmu.attr_rdpmc) return 0; - if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { - idx -= X86_PMC_IDX_FIXED; + if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) { + idx -= INTEL_PMC_IDX_FIXED; idx |= 1 << 30; } diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 8408e37f5fa4..5b0b362c7ae6 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -747,7 +747,7 @@ static void intel_pmu_disable_all(void) wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); - if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) + if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) intel_pmu_disable_bts(); intel_pmu_pebs_disable_all(); @@ -763,9 +763,9 @@ static void intel_pmu_enable_all(int added) wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); - if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { + if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { struct perf_event *event = - cpuc->events[X86_PMC_IDX_FIXED_BTS]; + cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; if (WARN_ON_ONCE(!event)) return; @@ -871,7 +871,7 @@ static inline void intel_pmu_ack_status(u64 ack) static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) { - int idx = hwc->idx - X86_PMC_IDX_FIXED; + int idx = hwc->idx - INTEL_PMC_IDX_FIXED; u64 ctrl_val, mask; mask = 0xfULL << (idx * 4); @@ -886,7 +886,7 @@ static void intel_pmu_disable_event(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { + if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { intel_pmu_disable_bts(); intel_pmu_drain_bts_buffer(); return; @@ -915,7 +915,7 @@ static void intel_pmu_disable_event(struct perf_event *event) static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) { - int idx = hwc->idx - X86_PMC_IDX_FIXED; + int idx = hwc->idx - INTEL_PMC_IDX_FIXED; u64 ctrl_val, bits, mask; /* @@ -949,7 +949,7 @@ static void intel_pmu_enable_event(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { + if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { if (!__this_cpu_read(cpu_hw_events.enabled)) return; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 026373edef7f..629ae0b7ad90 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -248,7 +248,7 @@ void reserve_ds_buffers(void) */ struct event_constraint bts_constraint = - EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); + EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0); void intel_pmu_enable_bts(u64 config) { @@ -295,7 +295,7 @@ int intel_pmu_drain_bts_buffer(void) u64 to; u64 flags; }; - struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; + struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; struct bts_record *at, *top; struct perf_output_handle handle; struct perf_event_header header; diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 6c82e4037989..92c7e39a079f 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -1325,7 +1325,7 @@ __init int p4_pmu_init(void) unsigned int low, high; /* If we get stripped -- indexing fails */ - BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); + BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC); rdmsr(MSR_IA32_MISC_ENABLE, low, high); if (!(low & (1 << 7))) { diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 2e88438ffd83..9b7ec1150ab0 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -80,10 +80,10 @@ static inline struct kvm_pmc *get_fixed_pmc_idx(struct kvm_pmu *pmu, int idx) static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx) { - if (idx < X86_PMC_IDX_FIXED) + if (idx < INTEL_PMC_IDX_FIXED) return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0); else - return get_fixed_pmc_idx(pmu, idx - X86_PMC_IDX_FIXED); + return get_fixed_pmc_idx(pmu, idx - INTEL_PMC_IDX_FIXED); } void kvm_deliver_pmi(struct kvm_vcpu *vcpu) @@ -291,7 +291,7 @@ static void reprogram_idx(struct kvm_pmu *pmu, int idx) if (pmc_is_gp(pmc)) reprogram_gp_counter(pmc, pmc->eventsel); else { - int fidx = idx - X86_PMC_IDX_FIXED; + int fidx = idx - INTEL_PMC_IDX_FIXED; reprogram_fixed_counter(pmc, fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx); } @@ -452,7 +452,7 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) return; pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff, - X86_PMC_MAX_GENERIC); + INTEL_PMC_MAX_GENERIC); pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1; bitmap_len = (entry->eax >> 24) & 0xff; @@ -462,13 +462,13 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) pmu->nr_arch_fixed_counters = 0; } else { pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), - X86_PMC_MAX_FIXED); + INTEL_PMC_MAX_FIXED); pmu->counter_bitmask[KVM_PMC_FIXED] = ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; } pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | - (((1ull << pmu->nr_arch_fixed_counters) - 1) << X86_PMC_IDX_FIXED); + (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); pmu->global_ctrl_mask = ~pmu->global_ctrl; } @@ -478,15 +478,15 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = &vcpu->arch.pmu; memset(pmu, 0, sizeof(*pmu)); - for (i = 0; i < X86_PMC_MAX_GENERIC; i++) { + for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { pmu->gp_counters[i].type = KVM_PMC_GP; pmu->gp_counters[i].vcpu = vcpu; pmu->gp_counters[i].idx = i; } - for (i = 0; i < X86_PMC_MAX_FIXED; i++) { + for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) { pmu->fixed_counters[i].type = KVM_PMC_FIXED; pmu->fixed_counters[i].vcpu = vcpu; - pmu->fixed_counters[i].idx = i + X86_PMC_IDX_FIXED; + pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED; } init_irq_work(&pmu->irq_work, trigger_pmi); kvm_pmu_cpuid_update(vcpu); @@ -498,13 +498,13 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu) int i; irq_work_sync(&pmu->irq_work); - for (i = 0; i < X86_PMC_MAX_GENERIC; i++) { + for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { struct kvm_pmc *pmc = &pmu->gp_counters[i]; stop_counter(pmc); pmc->counter = pmc->eventsel = 0; } - for (i = 0; i < X86_PMC_MAX_FIXED; i++) + for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) stop_counter(&pmu->fixed_counters[i]); pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = -- cgit v1.2.3 From a1eac7ac903ea9afbd4f133659710a0588c8eca5 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 20 Jun 2012 20:46:34 +0200 Subject: perf/x86: Move Intel specific code to intel_pmu_init() There is some Intel specific code in the generic x86 path. Move it to intel_pmu_init(). Since p4 and p6 pmus don't have fixed counters we may skip the check in case such a pmu is detected. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1340217996-2254-3-git-send-email-robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 34 ++-------------------------------- arch/x86/kernel/cpu/perf_event_intel.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 668050002602..7b4f1e871f7c 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1307,7 +1307,6 @@ static struct attribute_group x86_pmu_format_group = { static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; - struct event_constraint *c; int err; pr_info("Performance Events: "); @@ -1338,21 +1337,8 @@ static int __init init_hw_perf_events(void) for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) quirk->func(); - if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { - WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", - x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); - x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; - } - x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; - - if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { - WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", - x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED); - x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; - } - - x86_pmu.intel_ctrl |= - ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; + if (!x86_pmu.intel_ctrl) + x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; perf_events_lapic_init(); register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); @@ -1361,22 +1347,6 @@ static int __init init_hw_perf_events(void) __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, 0, x86_pmu.num_counters, 0); - if (x86_pmu.event_constraints) { - /* - * event on fixed counter2 (REF_CYCLES) only works on this - * counter, so do not extend mask to generic counters - */ - for_each_event_constraint(c, x86_pmu.event_constraints) { - if (c->cmask != X86_RAW_EVENT_MASK - || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { - continue; - } - - c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; - c->weight += x86_pmu.num_counters; - } - } - x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ x86_pmu_format_group.attrs = x86_pmu.format_attrs; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 5b0b362c7ae6..2e9444c80148 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1765,6 +1765,7 @@ __init int intel_pmu_init(void) union cpuid10_edx edx; union cpuid10_eax eax; union cpuid10_ebx ebx; + struct event_constraint *c; unsigned int unused; int version; @@ -1953,5 +1954,37 @@ __init int intel_pmu_init(void) } } + if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { + WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", + x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); + x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; + } + x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; + + if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { + WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", + x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED); + x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; + } + + x86_pmu.intel_ctrl |= + ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; + + if (x86_pmu.event_constraints) { + /* + * event on fixed counter2 (REF_CYCLES) only works on this + * counter, so do not extend mask to generic counters + */ + for_each_event_constraint(c, x86_pmu.event_constraints) { + if (c->cmask != X86_RAW_EVENT_MASK + || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { + continue; + } + + c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; + c->weight += x86_pmu.num_counters; + } + } + return 0; } -- cgit v1.2.3 From b1dc3c4820428ac6216537416b2fcd140fdc52e5 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 20 Jun 2012 20:46:35 +0200 Subject: perf/x86/amd: Unify AMD's generic and family 15h pmus There is no need for keeping separate pmu structs. We can enable amd_{get,put}_event_constraints() functions also for family 15h event. The advantage is that there is only a single pmu struct for all AMD cpus. This patch introduces functions to setup the pmu to enabe core performance counters or counter constraints. Also, cpuid checks are used instead of family checks where possible. Thus, it enables the code independently of cpu families if the feature flag is set. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1340217996-2254-4-git-send-email-robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 3 +- arch/x86/kernel/cpu/perf_event_amd.c | 103 ++++++++++++++++------------------- arch/x86/oprofile/op_model_amd.c | 4 +- 3 files changed, 49 insertions(+), 61 deletions(-) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 3b31248caf60..ffdf5e0991c4 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -47,8 +47,7 @@ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) #define AMD64_NUM_COUNTERS 4 -#define AMD64_NUM_COUNTERS_F15H 6 -#define AMD64_NUM_COUNTERS_MAX AMD64_NUM_COUNTERS_F15H +#define AMD64_NUM_COUNTERS_CORE 6 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 11a4eb9131d5..4528ae7b6ec4 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -366,7 +366,7 @@ static void amd_pmu_cpu_starting(int cpu) cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; - if (boot_cpu_data.x86_max_cores < 2 || boot_cpu_data.x86 == 0x15) + if (boot_cpu_data.x86_max_cores < 2) return; nb_id = amd_get_nb_id(cpu); @@ -422,35 +422,6 @@ static struct attribute *amd_format_attr[] = { NULL, }; -static __initconst const struct x86_pmu amd_pmu = { - .name = "AMD", - .handle_irq = x86_pmu_handle_irq, - .disable_all = x86_pmu_disable_all, - .enable_all = x86_pmu_enable_all, - .enable = x86_pmu_enable_event, - .disable = x86_pmu_disable_event, - .hw_config = amd_pmu_hw_config, - .schedule_events = x86_schedule_events, - .eventsel = MSR_K7_EVNTSEL0, - .perfctr = MSR_K7_PERFCTR0, - .event_map = amd_pmu_event_map, - .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = AMD64_NUM_COUNTERS, - .cntval_bits = 48, - .cntval_mask = (1ULL << 48) - 1, - .apic = 1, - /* use highest bit to detect overflow */ - .max_period = (1ULL << 47) - 1, - .get_event_constraints = amd_get_event_constraints, - .put_event_constraints = amd_put_event_constraints, - - .format_attrs = amd_format_attr, - - .cpu_prepare = amd_pmu_cpu_prepare, - .cpu_starting = amd_pmu_cpu_starting, - .cpu_dead = amd_pmu_cpu_dead, -}; - /* AMD Family 15h */ #define AMD_EVENT_TYPE_MASK 0x000000F0ULL @@ -597,8 +568,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev } } -static __initconst const struct x86_pmu amd_pmu_f15h = { - .name = "AMD Family 15h", +static __initconst const struct x86_pmu amd_pmu = { + .name = "AMD", .handle_irq = x86_pmu_handle_irq, .disable_all = x86_pmu_disable_all, .enable_all = x86_pmu_enable_all, @@ -606,50 +577,68 @@ static __initconst const struct x86_pmu amd_pmu_f15h = { .disable = x86_pmu_disable_event, .hw_config = amd_pmu_hw_config, .schedule_events = x86_schedule_events, - .eventsel = MSR_F15H_PERF_CTL, - .perfctr = MSR_F15H_PERF_CTR, + .eventsel = MSR_K7_EVNTSEL0, + .perfctr = MSR_K7_PERFCTR0, .event_map = amd_pmu_event_map, .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = AMD64_NUM_COUNTERS_F15H, + .num_counters = AMD64_NUM_COUNTERS, .cntval_bits = 48, .cntval_mask = (1ULL << 48) - 1, .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, - .get_event_constraints = amd_get_event_constraints_f15h, - /* nortbridge counters not yet implemented: */ -#if 0 + .get_event_constraints = amd_get_event_constraints, .put_event_constraints = amd_put_event_constraints, + .format_attrs = amd_format_attr, + .cpu_prepare = amd_pmu_cpu_prepare, - .cpu_dead = amd_pmu_cpu_dead, -#endif .cpu_starting = amd_pmu_cpu_starting, - .format_attrs = amd_format_attr, + .cpu_dead = amd_pmu_cpu_dead, }; +static int setup_event_constraints(void) +{ + if (boot_cpu_data.x86 >= 0x15) + x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; + return 0; +} + +static int setup_perfctr_core(void) +{ + if (!cpu_has_perfctr_core) { + WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h, + KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!"); + return -ENODEV; + } + + WARN(x86_pmu.get_event_constraints == amd_get_event_constraints, + KERN_ERR "hw perf events core counters need constraints handler!"); + + /* + * If core performance counter extensions exists, we must use + * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also + * x86_pmu_addr_offset(). + */ + x86_pmu.eventsel = MSR_F15H_PERF_CTL; + x86_pmu.perfctr = MSR_F15H_PERF_CTR; + x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; + + printk(KERN_INFO "perf: AMD core performance counters detected\n"); + + return 0; +} + __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ if (boot_cpu_data.x86 < 6) return -ENODEV; - /* - * If core performance counter extensions exists, it must be - * family 15h, otherwise fail. See x86_pmu_addr_offset(). - */ - switch (boot_cpu_data.x86) { - case 0x15: - if (!cpu_has_perfctr_core) - return -ENODEV; - x86_pmu = amd_pmu_f15h; - break; - default: - if (cpu_has_perfctr_core) - return -ENODEV; - x86_pmu = amd_pmu; - break; - } + x86_pmu = amd_pmu; + + setup_event_constraints(); + setup_perfctr_core(); /* Events are common for all AMDs */ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 303f08637826..b2b94438ff05 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -312,7 +312,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs) goto fail; } /* both registers must be reserved */ - if (num_counters == AMD64_NUM_COUNTERS_F15H) { + if (num_counters == AMD64_NUM_COUNTERS_CORE) { msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1); msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1); } else { @@ -514,7 +514,7 @@ static int op_amd_init(struct oprofile_operations *ops) ops->create_files = setup_ibs_files; if (boot_cpu_data.x86 == 0x15) { - num_counters = AMD64_NUM_COUNTERS_F15H; + num_counters = AMD64_NUM_COUNTERS_CORE; } else { num_counters = AMD64_NUM_COUNTERS; } -- cgit v1.2.3 From f285f92f7e4c9af20149130c8fd5027131b39b0e Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 20 Jun 2012 20:46:36 +0200 Subject: perf/x86: Improve debug output in check_hw_exists() It might be of interest which perfctr msr failed. Signed-off-by: Robert Richter [ added hunk to avoid GCC warn ] Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1340217996-2254-5-git-send-email-robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 7b4f1e871f7c..3eb88ebcec5a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -178,7 +178,7 @@ static void release_pmc_hardware(void) {} static bool check_hw_exists(void) { - u64 val, val_new = 0; + u64 val, val_new = ~0; int i, reg, ret = 0; /* @@ -211,8 +211,9 @@ static bool check_hw_exists(void) * that don't trap on the MSR access and always return 0s. */ val = 0xabcdUL; - ret = wrmsrl_safe(x86_pmu_event_addr(0), val); - ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new); + reg = x86_pmu_event_addr(0); + ret = wrmsrl_safe(reg, val); + ret |= rdmsrl_safe(reg, &val_new); if (ret || val != val_new) goto msr_fail; @@ -229,6 +230,7 @@ bios_fail: msr_fail: printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); + printk(KERN_ERR "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new); return false; } -- cgit v1.2.3 From c93dc84cbe32435be3ffa2fbde355eff94955c32 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Jun 2012 14:50:50 +0200 Subject: perf/x86: Add a microcode revision check for SNB-PEBS Recent Intel microcode resolved the SNB-PEBS issues, so conditionally enable PEBS on SNB hardware depending on the microcode revision. Thanks to Stephane for figuring out the various microcode revisions. Suggested-by: Stephane Eranian Acked-by: Borislav Petkov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-v3672ziwh9damwqwh1uz3krm@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 2 ++ arch/x86/kernel/cpu/perf_event.c | 21 +++++++++----- arch/x86/kernel/cpu/perf_event.h | 4 ++- arch/x86/kernel/cpu/perf_event_intel.c | 51 ++++++++++++++++++++++++++++++++-- arch/x86/kernel/microcode_core.c | 10 +++++-- 5 files changed, 74 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index ffdf5e0991c4..c78f14a0df00 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -232,6 +232,7 @@ struct perf_guest_switch_msr { extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); +extern void perf_check_microcode(void); #else static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) { @@ -245,6 +246,7 @@ static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) } static inline void perf_events_lapic_init(void) { } +static inline void perf_check_microcode(void) { } #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 3eb88ebcec5a..29557aa06dda 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -379,7 +379,7 @@ int x86_pmu_hw_config(struct perf_event *event) int precise = 0; /* Support for constant skid */ - if (x86_pmu.pebs_active) { + if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { precise++; /* Support for IP fixup */ @@ -1650,13 +1650,20 @@ static void x86_pmu_flush_branch_stack(void) x86_pmu.flush_branch_stack(); } +void perf_check_microcode(void) +{ + if (x86_pmu.check_microcode) + x86_pmu.check_microcode(); +} +EXPORT_SYMBOL_GPL(perf_check_microcode); + static struct pmu pmu = { .pmu_enable = x86_pmu_enable, .pmu_disable = x86_pmu_disable, - .attr_groups = x86_pmu_attr_groups, + .attr_groups = x86_pmu_attr_groups, - .event_init = x86_pmu_event_init, + .event_init = x86_pmu_event_init, .add = x86_pmu_add, .del = x86_pmu_del, @@ -1664,11 +1671,11 @@ static struct pmu pmu = { .stop = x86_pmu_stop, .read = x86_pmu_read, - .start_txn = x86_pmu_start_txn, - .cancel_txn = x86_pmu_cancel_txn, - .commit_txn = x86_pmu_commit_txn, + .start_txn = x86_pmu_start_txn, + .cancel_txn = x86_pmu_cancel_txn, + .commit_txn = x86_pmu_commit_txn, - .event_idx = x86_pmu_event_idx, + .event_idx = x86_pmu_event_idx, .flush_branch_stack = x86_pmu_flush_branch_stack, }; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 83238f2a12b2..3f5c66904355 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -361,6 +361,8 @@ struct x86_pmu { void (*cpu_starting)(int cpu); void (*cpu_dying)(int cpu); void (*cpu_dead)(int cpu); + + void (*check_microcode)(void); void (*flush_branch_stack)(void); /* @@ -373,7 +375,7 @@ struct x86_pmu { * Intel DebugStore bits */ int bts, pebs; - int bts_active, pebs_active; + int bts_active, pebs_active, pebs_broken; int pebs_record_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 2e9444c80148..5fdedb4bc3f1 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1712,11 +1712,56 @@ static __init void intel_clovertown_quirk(void) x86_pmu.pebs_constraints = NULL; } +static int intel_snb_pebs_broken(int cpu) +{ + u32 rev = UINT_MAX; /* default to broken for unknown models */ + + switch (cpu_data(cpu).x86_model) { + case 42: /* SNB */ + rev = 0x28; + break; + + case 45: /* SNB-EP */ + switch (cpu_data(cpu).x86_mask) { + case 6: rev = 0x618; break; + case 7: rev = 0x70c; break; + } + } + + return (cpu_data(cpu).microcode < rev); +} + +static void intel_snb_check_microcode(void) +{ + int pebs_broken = 0; + int cpu; + + get_online_cpus(); + for_each_online_cpu(cpu) { + if ((pebs_broken = intel_snb_pebs_broken(cpu))) + break; + } + put_online_cpus(); + + if (pebs_broken == x86_pmu.pebs_broken) + return; + + /* + * Serialized by the microcode lock.. + */ + if (x86_pmu.pebs_broken) { + pr_info("PEBS enabled due to microcode update\n"); + x86_pmu.pebs_broken = 0; + } else { + pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n"); + x86_pmu.pebs_broken = 1; + } +} + static __init void intel_sandybridge_quirk(void) { - printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); - x86_pmu.pebs = 0; - x86_pmu.pebs_constraints = NULL; + x86_pmu.check_microcode = intel_snb_check_microcode; + intel_snb_check_microcode(); } static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 947e4c64b1d8..1649cf899ad6 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -87,6 +87,7 @@ #include #include #include +#include MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian "); @@ -277,7 +278,6 @@ static int reload_for_cpu(int cpu) struct ucode_cpu_info *uci = ucode_cpu_info + cpu; int err = 0; - mutex_lock(µcode_mutex); if (uci->valid) { enum ucode_state ustate; @@ -288,7 +288,6 @@ static int reload_for_cpu(int cpu) if (ustate == UCODE_ERROR) err = -EINVAL; } - mutex_unlock(µcode_mutex); return err; } @@ -309,6 +308,7 @@ static ssize_t reload_store(struct device *dev, return size; get_online_cpus(); + mutex_lock(µcode_mutex); for_each_online_cpu(cpu) { tmp_ret = reload_for_cpu(cpu); if (tmp_ret != 0) @@ -318,6 +318,9 @@ static ssize_t reload_store(struct device *dev, if (!ret) ret = tmp_ret; } + if (!ret) + perf_check_microcode(); + mutex_unlock(µcode_mutex); put_online_cpus(); if (!ret) @@ -557,7 +560,8 @@ static int __init microcode_init(void) mutex_lock(µcode_mutex); error = subsys_interface_register(&mc_cpu_interface); - + if (!error) + perf_check_microcode(); mutex_unlock(µcode_mutex); put_online_cpus(); -- cgit v1.2.3 From 3e0091e2b6f8cd59e567f247e345a3a6ad1f6e7e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Jun 2012 23:38:39 +0200 Subject: perf/x86: Save a few bytes in 'struct x86_pmu' All these are basically boolean flags, use a bitfield to save a few bytes. Suggested-by: Borislav Petkov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-vsevd5g8lhcn129n3s7trl7r@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 3f5c66904355..a15df4be151f 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -374,8 +374,11 @@ struct x86_pmu { /* * Intel DebugStore bits */ - int bts, pebs; - int bts_active, pebs_active, pebs_broken; + int bts :1, + bts_active :1, + pebs :1, + pebs_active :1, + pebs_broken :1; int pebs_record_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; -- cgit v1.2.3 From eca26c9950f4d3e9c92ba275e9f4aee834aa1913 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 27 Jun 2012 15:09:12 +0800 Subject: perf/x86: Use 0xff as pseudo code for fixed uncore event Stephane Eranian suggestted using 0xff as pseudo code for fixed uncore event and using the umask value to determine which of the fixed events we want to map to. So far there is at most one fixed counter in a uncore PMU. So just change the definition of UNCORE_FIXED_EVENT to 0xff. Suggested-by: Stephane Eranian Signed-off-by: Yan, Zheng Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1340780953-21130-1-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 4 ++-- arch/x86/kernel/cpu/perf_event_intel_uncore.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 6f43f9584e3d..c42a3f7b5233 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -179,7 +179,7 @@ static struct attribute *snbep_uncore_pcu_formats_attr[] = { }; static struct uncore_event_desc snbep_uncore_imc_events[] = { - INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0xff"), + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"), INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"), { /* end: all zeroes */ }, @@ -616,7 +616,7 @@ static struct attribute_group nhm_uncore_format_group = { }; static struct uncore_event_desc nhm_uncore_events[] = { - INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0xff"), + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"), INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"), INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"), diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 4d52db0d1dfe..88498c7b3420 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -9,7 +9,7 @@ #define UNCORE_PMU_HRTIMER_INTERVAL (60 * NSEC_PER_SEC) -#define UNCORE_FIXED_EVENT 0xffff +#define UNCORE_FIXED_EVENT 0xff #define UNCORE_PMC_IDX_MAX_GENERIC 8 #define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC #define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1) -- cgit v1.2.3 From 3b19e4c98c035c9ab218fc64ef26f4f7a30eafb9 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 28 Jun 2012 14:56:36 +0800 Subject: perf/x86: Fix event constraint for SandyBridge-EP C-Box The constraint for C-Box event 0x1f should have overlap flag set. Signed-off-by: Yan, Zheng Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1340866596-22502-2-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index c42a3f7b5233..7d755d2e1c9c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -239,7 +239,7 @@ static struct event_constraint snbep_uncore_cbox_constraints[] = { UNCORE_EVENT_CONSTRAINT(0x1c, 0xc), UNCORE_EVENT_CONSTRAINT(0x1d, 0xc), UNCORE_EVENT_CONSTRAINT(0x1e, 0xc), - UNCORE_EVENT_CONSTRAINT(0x1f, 0xe), + EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff), UNCORE_EVENT_CONSTRAINT(0x21, 0x3), UNCORE_EVENT_CONSTRAINT(0x23, 0x3), UNCORE_EVENT_CONSTRAINT(0x31, 0x3), -- cgit v1.2.3 From 42089697244ba8e64fa43fb5e6d50d47a8e4cb00 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 4 Jul 2012 14:00:14 +0800 Subject: perf/x86: Detect number of instances of uncore CBox The CBox manages the interface between the core and the LLC, so the instances of uncore CBox is equal to number of cores. Reported-by: Andrew Cooks Signed-off-by: Yan, Zheng Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1341381616-12229-4-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 7d755d2e1c9c..4fecbd00ee75 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -1605,8 +1605,9 @@ static void __init uncore_cpu_setup(void *dummy) static int __init uncore_cpu_init(void) { - int ret, cpu; + int ret, cpu, max_cores; + max_cores = boot_cpu_data.x86_max_cores; switch (boot_cpu_data.x86_model) { case 26: /* Nehalem */ case 30: @@ -1615,9 +1616,13 @@ static int __init uncore_cpu_init(void) msr_uncores = nhm_msr_uncores; break; case 42: /* Sandy Bridge */ + if (snb_uncore_cbox.num_boxes > max_cores) + snb_uncore_cbox.num_boxes = max_cores; msr_uncores = snb_msr_uncores; break; case 45: /* Sandy Birdge-EP */ + if (snbep_uncore_cbox.num_boxes > max_cores) + snbep_uncore_cbox.num_boxes = max_cores; msr_uncores = snbep_msr_uncores; break; default: -- cgit v1.2.3 From 6a67943a18c264d5f3df436da38edb3e59adc905 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 4 Jul 2012 14:00:15 +0800 Subject: perf/x86: Uncore filter support for SandyBridge-EP This patch adds C-Box and PCU filter support for SandyBridge-EP uncore. We can filter C-Box events by thread/core ID and filter PCU events by frequency/voltage. Signed-off-by: Yan, Zheng Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1341381616-12229-5-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 228 +++++++++++++++++++++----- arch/x86/kernel/cpu/perf_event_intel_uncore.h | 24 ++- 2 files changed, 206 insertions(+), 46 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 4fecbd00ee75..19faffc60886 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -14,10 +14,13 @@ static cpumask_t uncore_cpu_mask; /* constraint for the fixed counter */ static struct event_constraint constraint_fixed = EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); +static struct event_constraint constraint_empty = + EVENT_CONSTRAINT(0, 0, 0); DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); +DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31"); @@ -26,8 +29,19 @@ DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28"); DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15"); DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); +DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31"); +DEFINE_UNCORE_FORMAT_ATTR(filter_brand0, filter_brand0, "config1:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(filter_brand1, filter_brand1, "config1:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(filter_brand2, filter_brand2, "config1:16-23"); +DEFINE_UNCORE_FORMAT_ATTR(filter_brand3, filter_brand3, "config1:24-31"); /* Sandy Bridge-EP uncore support */ +static struct intel_uncore_type snbep_uncore_cbox; +static struct intel_uncore_type snbep_uncore_pcu; + static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; @@ -120,6 +134,10 @@ static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) + wrmsrl(reg1->reg, reg1->config); wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); } @@ -149,6 +167,71 @@ static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT); } +static struct event_constraint * +snbep_uncore_get_constraint(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct intel_uncore_extra_reg *er; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + unsigned long flags; + bool ok = false; + + if (reg1->idx == EXTRA_REG_NONE || (box->phys_id >= 0 && reg1->alloc)) + return NULL; + + er = &box->shared_regs[reg1->idx]; + raw_spin_lock_irqsave(&er->lock, flags); + if (!atomic_read(&er->ref) || er->config1 == reg1->config) { + atomic_inc(&er->ref); + er->config1 = reg1->config; + ok = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + if (ok) { + if (box->phys_id >= 0) + reg1->alloc = 1; + return NULL; + } + return &constraint_empty; +} + +static void snbep_uncore_put_constraint(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct intel_uncore_extra_reg *er; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + + if (box->phys_id < 0 || !reg1->alloc) + return; + + er = &box->shared_regs[reg1->idx]; + atomic_dec(&er->ref); + reg1->alloc = 0; +} + +static int snbep_uncore_hw_config(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (box->pmu->type == &snbep_uncore_cbox) { + reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + + SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & + SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK; + } else if (box->pmu->type == &snbep_uncore_pcu) { + reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; + reg1->config = event->attr.config1 & + SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK; + } else { + return 0; + } + reg1->idx = 0; + return 0; +} + static struct attribute *snbep_uncore_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -167,6 +250,20 @@ static struct attribute *snbep_uncore_ubox_formats_attr[] = { NULL, }; +static struct attribute *snbep_uncore_cbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid.attr, + &format_attr_filter_nid.attr, + &format_attr_filter_state.attr, + &format_attr_filter_opc.attr, + NULL, +}; + static struct attribute *snbep_uncore_pcu_formats_attr[] = { &format_attr_event.attr, &format_attr_occ_sel.attr, @@ -175,6 +272,10 @@ static struct attribute *snbep_uncore_pcu_formats_attr[] = { &format_attr_thresh5.attr, &format_attr_occ_invert.attr, &format_attr_occ_edge.attr, + &format_attr_filter_brand0.attr, + &format_attr_filter_brand1.attr, + &format_attr_filter_brand2.attr, + &format_attr_filter_brand3.attr, NULL, }; @@ -203,6 +304,11 @@ static struct attribute_group snbep_uncore_ubox_format_group = { .attrs = snbep_uncore_ubox_formats_attr, }; +static struct attribute_group snbep_uncore_cbox_format_group = { + .name = "format", + .attrs = snbep_uncore_cbox_formats_attr, +}; + static struct attribute_group snbep_uncore_pcu_format_group = { .name = "format", .attrs = snbep_uncore_pcu_formats_attr, @@ -215,6 +321,9 @@ static struct intel_uncore_ops snbep_uncore_msr_ops = { .disable_event = snbep_uncore_msr_disable_event, .enable_event = snbep_uncore_msr_enable_event, .read_counter = snbep_uncore_msr_read_counter, + .get_constraint = snbep_uncore_get_constraint, + .put_constraint = snbep_uncore_put_constraint, + .hw_config = snbep_uncore_hw_config, }; static struct intel_uncore_ops snbep_uncore_pci_ops = { @@ -307,31 +416,33 @@ static struct intel_uncore_type snbep_uncore_ubox = { }; static struct intel_uncore_type snbep_uncore_cbox = { - .name = "cbox", - .num_counters = 4, - .num_boxes = 8, - .perf_ctr_bits = 44, - .event_ctl = SNBEP_C0_MSR_PMON_CTL0, - .perf_ctr = SNBEP_C0_MSR_PMON_CTR0, - .event_mask = SNBEP_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL, - .msr_offset = SNBEP_CBO_MSR_OFFSET, - .constraints = snbep_uncore_cbox_constraints, - .ops = &snbep_uncore_msr_ops, - .format_group = &snbep_uncore_format_group, + .name = "cbox", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 44, + .event_ctl = SNBEP_C0_MSR_PMON_CTL0, + .perf_ctr = SNBEP_C0_MSR_PMON_CTR0, + .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL, + .msr_offset = SNBEP_CBO_MSR_OFFSET, + .num_shared_regs = 1, + .constraints = snbep_uncore_cbox_constraints, + .ops = &snbep_uncore_msr_ops, + .format_group = &snbep_uncore_cbox_format_group, }; static struct intel_uncore_type snbep_uncore_pcu = { - .name = "pcu", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 48, - .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0, - .event_ctl = SNBEP_PCU_MSR_PMON_CTL0, - .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, - .ops = &snbep_uncore_msr_ops, - .format_group = &snbep_uncore_pcu_format_group, + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0, + .event_ctl = SNBEP_PCU_MSR_PMON_CTL0, + .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &snbep_uncore_msr_ops, + .format_group = &snbep_uncore_pcu_format_group, }; static struct intel_uncore_type *snbep_msr_uncores[] = { @@ -747,15 +858,22 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) box->hrtimer.function = uncore_pmu_hrtimer; } -struct intel_uncore_box *uncore_alloc_box(int cpu) +struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, + int cpu) { struct intel_uncore_box *box; + int i, size; - box = kmalloc_node(sizeof(*box), GFP_KERNEL | __GFP_ZERO, - cpu_to_node(cpu)); + size = sizeof(*box) + type->num_shared_regs * + sizeof(struct intel_uncore_extra_reg); + + box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu)); if (!box) return NULL; + for (i = 0; i < type->num_shared_regs; i++) + raw_spin_lock_init(&box->shared_regs[i].lock); + uncore_pmu_init_hrtimer(box); atomic_set(&box->refcnt, 1); box->cpu = -1; @@ -834,11 +952,18 @@ static int uncore_collect_events(struct intel_uncore_box *box, } static struct event_constraint * -uncore_event_constraint(struct intel_uncore_type *type, - struct perf_event *event) +uncore_get_event_constraint(struct intel_uncore_box *box, + struct perf_event *event) { + struct intel_uncore_type *type = box->pmu->type; struct event_constraint *c; + if (type->ops->get_constraint) { + c = type->ops->get_constraint(box, event); + if (c) + return c; + } + if (event->hw.config == ~0ULL) return &constraint_fixed; @@ -852,19 +977,25 @@ uncore_event_constraint(struct intel_uncore_type *type, return &type->unconstrainted; } +static void uncore_put_event_constraint(struct intel_uncore_box *box, + struct perf_event *event) +{ + if (box->pmu->type->ops->put_constraint) + box->pmu->type->ops->put_constraint(box, event); +} + static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) { unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX]; - int i, ret, wmin, wmax; + int i, wmin, wmax, ret = 0; struct hw_perf_event *hwc; bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { - c = uncore_event_constraint(box->pmu->type, - box->event_list[i]); + c = uncore_get_event_constraint(box, box->event_list[i]); constraints[i] = c; wmin = min(wmin, c->weight); wmax = max(wmax, c->weight); @@ -888,13 +1019,17 @@ static int uncore_assign_events(struct intel_uncore_box *box, break; __set_bit(hwc->idx, used_mask); - assign[i] = hwc->idx; + if (assign) + assign[i] = hwc->idx; } - if (i == n) - return 0; - /* slow path */ - ret = perf_assign_events(constraints, n, wmin, wmax, assign); + if (i != n) + ret = perf_assign_events(constraints, n, wmin, wmax, assign); + + if (!assign || ret) { + for (i = 0; i < n; i++) + uncore_put_event_constraint(box, box->event_list[i]); + } return ret ? -EINVAL : 0; } @@ -1021,6 +1156,8 @@ static void uncore_pmu_event_del(struct perf_event *event, int flags) for (i = 0; i < box->n_events; i++) { if (event == box->event_list[i]) { + uncore_put_event_constraint(box, event); + while (++i < box->n_events) box->event_list[i - 1] = box->event_list[i]; @@ -1048,10 +1185,9 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu, { struct perf_event *leader = event->group_leader; struct intel_uncore_box *fake_box; - int assign[UNCORE_PMC_IDX_MAX]; int ret = -EINVAL, n; - fake_box = uncore_alloc_box(smp_processor_id()); + fake_box = uncore_alloc_box(pmu->type, smp_processor_id()); if (!fake_box) return -ENOMEM; @@ -1073,7 +1209,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu, fake_box->n_events = n; - ret = uncore_assign_events(fake_box, assign, n); + ret = uncore_assign_events(fake_box, NULL, n); out: kfree(fake_box); return ret; @@ -1117,6 +1253,10 @@ int uncore_pmu_event_init(struct perf_event *event) return -EINVAL; event->cpu = box->cpu; + event->hw.idx = -1; + event->hw.last_tag = ~0ULL; + event->hw.extra_reg.idx = EXTRA_REG_NONE; + if (event->attr.config == UNCORE_FIXED_EVENT) { /* no fixed counter */ if (!pmu->type->fixed_ctl) @@ -1130,11 +1270,13 @@ int uncore_pmu_event_init(struct perf_event *event) hwc->config = ~0ULL; } else { hwc->config = event->attr.config & pmu->type->event_mask; + if (pmu->type->ops->hw_config) { + ret = pmu->type->ops->hw_config(box, event); + if (ret) + return ret; + } } - event->hw.idx = -1; - event->hw.last_tag = ~0ULL; - if (event->group_leader != event) ret = uncore_validate_group(pmu, event); else @@ -1276,7 +1418,7 @@ static int __devinit uncore_pci_add(struct intel_uncore_type *type, if (phys_id < 0) return -ENODEV; - box = uncore_alloc_box(0); + box = uncore_alloc_box(type, 0); if (!box) return -ENOMEM; @@ -1458,7 +1600,7 @@ static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id) if (pmu->func_id < 0) pmu->func_id = j; - box = uncore_alloc_box(cpu); + box = uncore_alloc_box(type, cpu); if (!box) return -ENOMEM; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 88498c7b3420..b13e9ea81def 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -97,6 +97,10 @@ SNBEP_PMON_CTL_INVERT | \ SNBEP_U_MSR_PMON_CTL_TRESH_MASK) +#define SNBEP_CBO_PMON_CTL_TID_EN (1 << 19) +#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_CBO_PMON_CTL_TID_EN) + /* SNB-EP PCU event control */ #define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK 0x0000c000 #define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK 0x1f000000 @@ -140,15 +144,17 @@ /* SNB-EP Cbo register */ #define SNBEP_C0_MSR_PMON_CTR0 0xd16 #define SNBEP_C0_MSR_PMON_CTL0 0xd10 -#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14 #define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04 +#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14 +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK 0xfffffc1f #define SNBEP_CBO_MSR_OFFSET 0x20 /* SNB-EP PCU register */ #define SNBEP_PCU_MSR_PMON_CTR0 0xc36 #define SNBEP_PCU_MSR_PMON_CTL0 0xc30 -#define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34 #define SNBEP_PCU_MSR_PMON_BOX_CTL 0xc24 +#define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34 +#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK 0xffffffff #define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc #define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd @@ -163,7 +169,6 @@ struct intel_uncore_type { int num_boxes; int perf_ctr_bits; int fixed_ctr_bits; - int single_fixed; unsigned perf_ctr; unsigned event_ctl; unsigned event_mask; @@ -171,6 +176,8 @@ struct intel_uncore_type { unsigned fixed_ctl; unsigned box_ctl; unsigned msr_offset; + unsigned num_shared_regs:8; + unsigned single_fixed:1; struct event_constraint unconstrainted; struct event_constraint *constraints; struct intel_uncore_pmu *pmus; @@ -188,6 +195,10 @@ struct intel_uncore_ops { void (*disable_event)(struct intel_uncore_box *, struct perf_event *); void (*enable_event)(struct intel_uncore_box *, struct perf_event *); u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *); + int (*hw_config)(struct intel_uncore_box *, struct perf_event *); + struct event_constraint *(*get_constraint)(struct intel_uncore_box *, + struct perf_event *); + void (*put_constraint)(struct intel_uncore_box *, struct perf_event *); }; struct intel_uncore_pmu { @@ -200,6 +211,12 @@ struct intel_uncore_pmu { struct list_head box_list; }; +struct intel_uncore_extra_reg { + raw_spinlock_t lock; + u64 config1; + atomic_t ref; +}; + struct intel_uncore_box { int phys_id; int n_active; /* number of active events */ @@ -215,6 +232,7 @@ struct intel_uncore_box { struct intel_uncore_pmu *pmu; struct hrtimer hrtimer; struct list_head list; + struct intel_uncore_extra_reg shared_regs[0]; }; #define UNCORE_BOX_FLAG_INITIATED 0 -- cgit v1.2.3 From 2d3f6ccc4ea5c74d4b4af1b47c56b4cff4bbfcb7 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 4 Jul 2012 20:38:19 +0200 Subject: batman-adv: check incoming packet type for bla If the gateway functionality is used, some broadcast packets (DHCP requests) may be transmitted as unicast packets. As the bridge loop avoidance code now only considers the payload Ethernet destination, it may drop the DHCP request for clients which are claimed by other backbone gateways, because it falsely infers from the broadcast address that the right backbone gateway should havehandled the broadcast. Fix this by checking and delegating the batman-adv packet type used for transmission. Reported-by: Guido Iribarren Signed-off-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 15 +++++++++++---- net/batman-adv/bridge_loop_avoidance.h | 5 +++-- net/batman-adv/soft-interface.c | 6 +++++- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 8bf97515a77d..c5863f499133 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1351,6 +1351,7 @@ void bla_free(struct bat_priv *bat_priv) * @bat_priv: the bat priv with all the soft interface information * @skb: the frame to be checked * @vid: the VLAN ID of the frame + * @is_bcast: the packet came in a broadcast packet type. * * bla_rx avoidance checks if: * * we have to race for a claim @@ -1361,7 +1362,8 @@ void bla_free(struct bat_priv *bat_priv) * process the skb. * */ -int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid) +int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid, + bool is_bcast) { struct ethhdr *ethhdr; struct claim search_claim, *claim = NULL; @@ -1380,7 +1382,7 @@ int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid) if (unlikely(atomic_read(&bat_priv->bla_num_requests))) /* don't allow broadcasts while requests are in flight */ - if (is_multicast_ether_addr(ethhdr->h_dest)) + if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) goto handled; memcpy(search_claim.addr, ethhdr->h_source, ETH_ALEN); @@ -1406,8 +1408,13 @@ int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid) } /* if it is a broadcast ... */ - if (is_multicast_ether_addr(ethhdr->h_dest)) { - /* ... drop it. the responsible gateway is in charge. */ + if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) { + /* ... drop it. the responsible gateway is in charge. + * + * We need to check is_bcast because with the gateway + * feature, broadcasts (like DHCP requests) may be sent + * using a unicast packet type. + */ goto handled; } else { /* seems the client considers us as its best gateway. diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index e39f93acc28f..dc5227b398d4 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -23,7 +23,8 @@ #define _NET_BATMAN_ADV_BLA_H_ #ifdef CONFIG_BATMAN_ADV_BLA -int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid); +int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid, + bool is_bcast); int bla_tx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid); int bla_is_backbone_gw(struct sk_buff *skb, struct orig_node *orig_node, int hdr_size); @@ -41,7 +42,7 @@ void bla_free(struct bat_priv *bat_priv); #else /* ifdef CONFIG_BATMAN_ADV_BLA */ static inline int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, - short vid) + short vid, bool is_bcast) { return 0; } diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 6e2530b02043..a0ec0e4ada4c 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -256,7 +256,11 @@ void interface_rx(struct net_device *soft_iface, struct bat_priv *bat_priv = netdev_priv(soft_iface); struct ethhdr *ethhdr; struct vlan_ethhdr *vhdr; + struct batman_header *batadv_header = (struct batman_header *)skb->data; short vid __maybe_unused = -1; + bool is_bcast; + + is_bcast = (batadv_header->packet_type == BAT_BCAST); /* check if enough space is available for pulling, and pull */ if (!pskb_may_pull(skb, hdr_size)) @@ -302,7 +306,7 @@ void interface_rx(struct net_device *soft_iface, /* Let the bridge loop avoidance check the packet. If will * not handle it, we can safely push it up. */ - if (bla_rx(bat_priv, skb, vid)) + if (bla_rx(bat_priv, skb, vid, is_bcast)) goto out; netif_rx(skb); -- cgit v1.2.3 From 326f418b4c74ab4a6066f38b6144bc6461f9447b Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 28 Jun 2012 15:04:57 -0400 Subject: tracepoint: Use static_key_false(), since static_branch() is deprecated Convert the last user of static_branch() -> static_key_false(). Signed-off-by: Jason Baron Cc: rostedt@goodmis.org Cc: mathieu.desnoyers@efficios.com Cc: a.p.zijlstra@chello.nl Link: http://lkml.kernel.org/r/5fffcd40a6c063769badcdd74a7d90980500dbcb.1340909155.git.jbaron@redhat.com Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index bd96ecd0e05c..802de56c41e8 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -153,7 +153,7 @@ static inline void tracepoint_synchronize_unregister(void) } \ static inline void trace_##name##_rcuidle(proto) \ { \ - if (static_branch(&__tracepoint_##name.key)) \ + if (static_key_false(&__tracepoint_##name.key)) \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args), \ -- cgit v1.2.3 From 47fbc518a4b5c9a949f7cab8b14a00d3549bf138 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 28 Jun 2012 15:05:02 -0400 Subject: jump label: Remove static_branch() Remove the obsolete static_branch() interface, since the supported interface is now static_key_false()/true() - which is used by all in-tree code. See commit: c5905afb0e ("static keys: Introduce 'struct static_key', static_key_true()/false() and static_key_slow_[inc|dec]()"). Signed-off-by: Jason Baron Cc: rostedt@goodmis.org Cc: mathieu.desnoyers@efficios.com Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/199332c47eef8005d5a5bf1018a80d25929a5746.1340909155.git.jbaron@redhat.com Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index c513a40510f5..0976fc46d1e0 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -42,8 +42,7 @@ * allowed. * * Not initializing the key (static data is initialized to 0s anyway) is the - * same as using STATIC_KEY_INIT_FALSE and static_key_false() is - * equivalent with static_branch(). + * same as using STATIC_KEY_INIT_FALSE. * */ @@ -107,12 +106,6 @@ static __always_inline bool static_key_true(struct static_key *key) return !static_key_false(key); } -/* Deprecated. Please use 'static_key_false() instead. */ -static __always_inline bool static_branch(struct static_key *key) -{ - return arch_static_branch(key); -} - extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; @@ -166,14 +159,6 @@ static __always_inline bool static_key_true(struct static_key *key) return false; } -/* Deprecated. Please use 'static_key_false() instead. */ -static __always_inline bool static_branch(struct static_key *key) -{ - if (unlikely(atomic_read(&key->enabled)) > 0) - return true; - return false; -} - static inline void static_key_slow_inc(struct static_key *key) { atomic_inc(&key->enabled); -- cgit v1.2.3 From b39f25a849d7677a7dbf183f2483fd41c201a5ce Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 25 Jun 2012 13:38:27 -0700 Subject: x86/apic: Optimize cpu traversal in __assign_irq_vector() using domain membership Currently __assign_irq_vector() goes through each cpu in the specified mask until it finds a free vector in all the cpu's that are part of the same interrupt domain. We visit all the interrupt domain sibling cpus to reserve the free vector. So, when we fail to find a free vector in an interrupt domain, it is safe to continue our search with a cpu belonging to a new interrupt domain. No need to go through each cpu, if the domain containing that cpu is already visited. Use the irq_cfg's old_domain to track the visited domains and optimize the cpu traversal while finding a free vector in the given cpumask. NOTE: We can also optimize the search by using for_each_cpu() and skip the current cpu, if it is not the first cpu in the mask returned by the vector_allocation_domain(). But re-using the cfg->old_domain to track the visited domains will be slightly faster. Signed-off-by: Suresh Siddha Acked-by: Yinghai Lu Acked-by: Alexander Gordeev Acked-by: Cyrill Gorcunov Link: http://lkml.kernel.org/r/1340656709-11423-2-git-send-email-suresh.b.siddha@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 8 +++----- arch/x86/kernel/apic/apic_noop.c | 3 +-- arch/x86/kernel/apic/io_apic.c | 15 ++++++++------- arch/x86/kernel/apic/x2apic_cluster.c | 3 +-- arch/x86/kernel/vsmp_64.c | 3 +-- 5 files changed, 14 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index eec240e12091..8bebeb8952fb 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -306,7 +306,7 @@ struct apic { unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); unsigned long (*check_apicid_present)(int apicid); - bool (*vector_allocation_domain)(int cpu, struct cpumask *retmask); + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); void (*init_apic_ldr)(void); void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); @@ -614,7 +614,7 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, const struct cpumask *andmask, unsigned int *apicid); -static inline bool +static inline void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus @@ -627,14 +627,12 @@ flat_vector_allocation_domain(int cpu, struct cpumask *retmask) */ cpumask_clear(retmask); cpumask_bits(retmask)[0] = APIC_ALL_CPUS; - return false; } -static inline bool +static inline void default_vector_allocation_domain(int cpu, struct cpumask *retmask) { cpumask_copy(retmask, cpumask_of(cpu)); - return true; } static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 65c07fc630a1..08c337bc49ff 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -100,12 +100,11 @@ static unsigned long noop_check_apicid_present(int bit) return physid_isset(bit, phys_cpu_present_map); } -static bool noop_vector_allocation_domain(int cpu, struct cpumask *retmask) +static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) { if (cpu != 0) pr_warning("APIC: Vector allocated for non-BSP cpu\n"); cpumask_copy(retmask, cpumask_of(cpu)); - return true; } static u32 noop_apic_read(u32 reg) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index a951ef7decb1..8a08f09aa505 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1134,12 +1134,13 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) /* Only try and allocate irqs on cpus that are present */ err = -ENOSPC; - for_each_cpu_and(cpu, mask, cpu_online_mask) { + cpumask_clear(cfg->old_domain); + cpu = cpumask_first_and(mask, cpu_online_mask); + while (cpu < nr_cpu_ids) { int new_cpu; int vector, offset; - bool more_domains; - more_domains = apic->vector_allocation_domain(cpu, tmp_mask); + apic->vector_allocation_domain(cpu, tmp_mask); if (cpumask_subset(tmp_mask, cfg->domain)) { free_cpumask_var(tmp_mask); @@ -1156,10 +1157,10 @@ next: } if (unlikely(current_vector == vector)) { - if (more_domains) - continue; - else - break; + cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask); + cpumask_andnot(tmp_mask, mask, cfg->old_domain); + cpu = cpumask_first_and(tmp_mask, cpu_online_mask); + continue; } if (test_bit(vector, used_vectors)) diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 943d03fc6fc4..b5d889b5659a 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -212,11 +212,10 @@ static int x2apic_cluster_probe(void) /* * Each x2apic cluster is an allocation domain. */ -static bool cluster_vector_allocation_domain(int cpu, struct cpumask *retmask) +static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask) { cpumask_clear(retmask); cpumask_copy(retmask, per_cpu(cpus_in_cluster, cpu)); - return true; } static struct apic apic_x2apic_cluster = { diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index fa5adb7c228c..3f0285ac00fa 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -208,10 +208,9 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) * In vSMP, all cpus should be capable of handling interrupts, regardless of * the APIC used. */ -static bool fill_vector_allocation_domain(int cpu, struct cpumask *retmask) +static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask) { cpumask_setall(retmask); - return false; } static void vsmp_apic_post_init(void) -- cgit v1.2.3 From 1ac322d0b169c95ce34d55b3ed6d40ce1a5f3a02 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 25 Jun 2012 13:38:28 -0700 Subject: x86/apic/x2apic: Limit the vector reservation to the user specified mask For the x2apic cluster mode, vector for an interrupt is currently reserved on all the cpu's that are part of the x2apic cluster. But the interrupts will be routed only to the cluster (derived from the first cpu in the mask) members specified in the mask. So there is no need to reserve the vector in the unused cluster members. Modify __assign_irq_vector() to reserve the vectors based on the user specified irq destination mask. If the new mask is a proper subset of the currently used mask, cleanup the vector allocation on the unused cpu members. Also, allow the apic driver to tune the vector domain based on the affinity mask (which in most cases is the user-specified mask). Signed-off-by: Suresh Siddha Acked-by: Yinghai Lu Acked-by: Alexander Gordeev Acked-by: Cyrill Gorcunov Link: http://lkml.kernel.org/r/1340656709-11423-3-git-send-email-suresh.b.siddha@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 9 ++++++--- arch/x86/kernel/apic/apic_noop.c | 3 ++- arch/x86/kernel/apic/io_apic.c | 31 +++++++++++++++---------------- arch/x86/kernel/apic/x2apic_cluster.c | 6 +++--- arch/x86/kernel/vsmp_64.c | 3 ++- 5 files changed, 28 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 8bebeb8952fb..88093c1d44fd 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -306,7 +306,8 @@ struct apic { unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); unsigned long (*check_apicid_present)(int apicid); - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask, + const struct cpumask *mask); void (*init_apic_ldr)(void); void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); @@ -615,7 +616,8 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, unsigned int *apicid); static inline void -flat_vector_allocation_domain(int cpu, struct cpumask *retmask) +flat_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -630,7 +632,8 @@ flat_vector_allocation_domain(int cpu, struct cpumask *retmask) } static inline void -default_vector_allocation_domain(int cpu, struct cpumask *retmask) +default_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) { cpumask_copy(retmask, cpumask_of(cpu)); } diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 08c337bc49ff..e145f28b4099 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -100,7 +100,8 @@ static unsigned long noop_check_apicid_present(int bit) return physid_isset(bit, phys_cpu_present_map); } -static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) +static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) { if (cpu != 0) pr_warning("APIC: Vector allocated for non-BSP cpu\n"); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8a08f09aa505..9684f963befe 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1113,7 +1113,6 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) */ static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; static int current_offset = VECTOR_OFFSET_START % 16; - unsigned int old_vector; int cpu, err; cpumask_var_t tmp_mask; @@ -1123,28 +1122,28 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) return -ENOMEM; - old_vector = cfg->vector; - if (old_vector) { - cpumask_and(tmp_mask, mask, cpu_online_mask); - if (cpumask_subset(tmp_mask, cfg->domain)) { - free_cpumask_var(tmp_mask); - return 0; - } - } - /* Only try and allocate irqs on cpus that are present */ err = -ENOSPC; cpumask_clear(cfg->old_domain); cpu = cpumask_first_and(mask, cpu_online_mask); while (cpu < nr_cpu_ids) { - int new_cpu; - int vector, offset; + int new_cpu, vector, offset; - apic->vector_allocation_domain(cpu, tmp_mask); + apic->vector_allocation_domain(cpu, tmp_mask, mask); if (cpumask_subset(tmp_mask, cfg->domain)) { - free_cpumask_var(tmp_mask); - return 0; + err = 0; + if (cpumask_equal(tmp_mask, cfg->domain)) + break; + /* + * New cpumask using the vector is a proper subset of + * the current in use mask. So cleanup the vector + * allocation for the members that are not used anymore. + */ + cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); + cfg->move_in_progress = 1; + cpumask_and(cfg->domain, cfg->domain, tmp_mask); + break; } vector = current_vector; @@ -1172,7 +1171,7 @@ next: /* Found one! */ current_vector = vector; current_offset = offset; - if (old_vector) { + if (cfg->vector) { cfg->move_in_progress = 1; cpumask_copy(cfg->old_domain, cfg->domain); } diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index b5d889b5659a..bde78d0098a4 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -212,10 +212,10 @@ static int x2apic_cluster_probe(void) /* * Each x2apic cluster is an allocation domain. */ -static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask) +static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) { - cpumask_clear(retmask); - cpumask_copy(retmask, per_cpu(cpus_in_cluster, cpu)); + cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); } static struct apic apic_x2apic_cluster = { diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 3f0285ac00fa..992f890283e9 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -208,7 +208,8 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) * In vSMP, all cpus should be capable of handling interrupts, regardless of * the APIC used. */ -static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask) +static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) { cpumask_setall(retmask); } -- cgit v1.2.3 From d872818dbbeed1bccf58c7f8c7db432154c802f9 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 25 Jun 2012 13:38:29 -0700 Subject: x86/apic/x2apic: Use multiple cluster members for the irq destination only with the explicit affinity During boot or driver load etc, interrupt destination is setup using default target cpu's. Later the user (irqbalance etc) or the driver (irq_set_affinity/ irq_set_affinity_hint) can request the interrupt to be migrated to some specific set of cpu's. In the x2apic cluster routing, for the default scenario use single cpu as the interrupt destination and when there is an explicit interrupt affinity request, route the interrupt to multiple members of a x2apic cluster specified in the cpumask of the migration request. This will minmize the vector pressure when there are lot of interrupt sources and relatively few x2apic clusters (for example a single socket server). This will allow the performance critical interrupts to be routed to multiple cpu's in the x2apic cluster (irqbalance for example uses the cache siblings etc while specifying the interrupt destination) and allow non-critical interrupts to be serviced by a single logical cpu. Signed-off-by: Suresh Siddha Acked-by: Yinghai Lu Acked-by: Alexander Gordeev Acked-by: Cyrill Gorcunov Link: http://lkml.kernel.org/r/1340656709-11423-4-git-send-email-suresh.b.siddha@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_cluster.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index bde78d0098a4..c88baa4ff0e5 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -209,13 +209,30 @@ static int x2apic_cluster_probe(void) return 0; } +static const struct cpumask *x2apic_cluster_target_cpus(void) +{ + return cpu_all_mask; +} + /* * Each x2apic cluster is an allocation domain. */ static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask, const struct cpumask *mask) { - cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); + /* + * To minimize vector pressure, default case of boot, device bringup + * etc will use a single cpu for the interrupt destination. + * + * On explicit migration requests coming from irqbalance etc, + * interrupts will be routed to the x2apic cluster (cluster-id + * derived from the first cpu in the mask) members specified + * in the mask. + */ + if (mask == x2apic_cluster_target_cpus()) + cpumask_copy(retmask, cpumask_of(cpu)); + else + cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); } static struct apic apic_x2apic_cluster = { @@ -229,7 +246,7 @@ static struct apic apic_x2apic_cluster = { .irq_delivery_mode = dest_LowestPrio, .irq_dest_mode = 1, /* logical */ - .target_cpus = online_target_cpus, + .target_cpus = x2apic_cluster_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, -- cgit v1.2.3 From 6a6dccba2fdc2a69f1f36b8f1c0acc8598e7221b Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 5 Jul 2012 15:52:23 +0530 Subject: mm: cma: don't replace lowmem pages with highmem The filesystem layer expects pages in the block device's mapping to not be in highmem (the mapping's gfp mask is set in bdget()), but CMA can currently replace lowmem pages with highmem pages, leading to crashes in filesystem code such as the one below: Unable to handle kernel NULL pointer dereference at virtual address 00000400 pgd = c0c98000 [00000400] *pgd=00c91831, *pte=00000000, *ppte=00000000 Internal error: Oops: 817 [#1] PREEMPT SMP ARM CPU: 0 Not tainted (3.5.0-rc5+ #80) PC is at __memzero+0x24/0x80 ... Process fsstress (pid: 323, stack limit = 0xc0cbc2f0) Backtrace: [] (ext4_getblk+0x0/0x180) from [] (ext4_bread+0x1c/0x98) [] (ext4_bread+0x0/0x98) from [] (ext4_mkdir+0x160/0x3bc) r4:c15337f0 [] (ext4_mkdir+0x0/0x3bc) from [] (vfs_mkdir+0x8c/0x98) [] (vfs_mkdir+0x0/0x98) from [] (sys_mkdirat+0x74/0xac) r6:00000000 r5:c152eb40 r4:000001ff r3:c14b43f0 [] (sys_mkdirat+0x0/0xac) from [] (sys_mkdir+0x20/0x24) r6:beccdcf0 r5:00074000 r4:beccdbbc [] (sys_mkdir+0x0/0x24) from [] (ret_fast_syscall+0x0/0x30) Fix this by replacing only highmem pages with highmem. Reported-by: Laura Abbott Signed-off-by: Rabin Vincent Acked-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski --- mm/page_alloc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 44030096da63..4a4f9219683f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5635,7 +5635,12 @@ static struct page * __alloc_contig_migrate_alloc(struct page *page, unsigned long private, int **resultp) { - return alloc_page(GFP_HIGHUSER_MOVABLE); + gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; + + if (PageHighMem(page)) + gfp_mask |= __GFP_HIGHMEM; + + return alloc_page(gfp_mask); } /* [start, end) must belong to a single zone. */ -- cgit v1.2.3 From cc2caea5b6152b8ce66dc2bbe83dc72b60612da8 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 6 Jul 2012 12:02:04 +0200 Subject: mm: cma: fix condition check when setting global cma area dev_set_cma_area incorrectly assigned cma to global area on first call due to incorrect check. This patch fixes this issue. Signed-off-by: Marek Szyprowski --- include/asm-generic/dma-contiguous.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h index c544356b374b..294b1e755ab2 100644 --- a/include/asm-generic/dma-contiguous.h +++ b/include/asm-generic/dma-contiguous.h @@ -18,7 +18,7 @@ static inline void dev_set_cma_area(struct device *dev, struct cma *cma) { if (dev) dev->cma_area = cma; - if (!dev || !dma_contiguous_default_area) + if (!dev && !dma_contiguous_default_area) dma_contiguous_default_area = cma; } -- cgit v1.2.3 From c3b7cdf180090d2686239a75bb0ae408108ed749 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 6 Jul 2012 12:59:46 +0300 Subject: perf/x86: Fix intel_perfmon_event_mapformatting Use tabs for "intel_perfmon_event_map" formatting in perf_event_intel.c. Signed-off-by: Pekka Enberg Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1341568786-7045-1-git-send-email-penberg@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 5fdedb4bc3f1..1f4c8add6759 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -21,14 +21,14 @@ */ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = { - [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, - [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, - [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ + [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, + [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, + [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ }; static struct event_constraint intel_core_event_constraints[] __read_mostly = -- cgit v1.2.3 From cfca927972e31a5b3da49bf641c525732ff3c357 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 25 Jun 2012 12:54:17 -0700 Subject: rcu: Introduce check for callback list/count mismatch The recent bug that introduced the RCU callback list/count mismatch showed the need for a diagnostic to check for this, which this commit adds. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 95c7b61e77e4..4154c9567a6d 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1612,6 +1612,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) rdp->n_force_qs_snap = rsp->n_force_qs; } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) rdp->qlen_last_fqs_check = rdp->qlen; + WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0)); local_irq_restore(flags); -- cgit v1.2.3 From c701d5d9b384ff03ceb232ef21236364d784a411 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 28 Jun 2012 08:08:25 -0700 Subject: rcu: Fix code-style issues involving "else" The Linux kernel coding style says that single-statement blocks should omit curly braces unless the other leg of the "if" statement has multiple statements, in which case the curly braces should be included. This commit fixes RCU's violations of this rule. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutiny_plugin.h | 7 ++++--- kernel/rcutorture.c | 3 ++- kernel/rcutree.c | 7 ++++--- kernel/rcutree_plugin.h | 14 ++++++++------ 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 116725b5edfb..918fd1e8509c 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -350,8 +350,9 @@ static int rcu_initiate_boost(void) rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; invoke_rcu_callbacks(); - } else + } else { RCU_TRACE(rcu_initiate_boost_trace()); + } return 1; } @@ -778,9 +779,9 @@ void synchronize_rcu_expedited(void) rpcp->exp_tasks = NULL; /* Wait for tail of ->blkd_tasks list to drain. */ - if (!rcu_preempted_readers_exp()) + if (!rcu_preempted_readers_exp()) { local_irq_restore(flags); - else { + } else { rcu_initiate_boost(); local_irq_restore(flags); wait_event(sync_rcu_preempt_exp_wq, diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index c279ee920947..155fb129b641 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -408,8 +408,9 @@ rcu_torture_cb(struct rcu_head *p) if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) { rp->rtort_mbtest = 0; rcu_torture_free(rp); - } else + } else { cur_ops->deferred_free(rp); + } } static int rcu_no_completed(void) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 117218a43724..f280e542e3e9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -892,8 +892,9 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct if (rnp->qsmask & rdp->grpmask) { rdp->qs_pending = 1; rdp->passed_quiesce = 0; - } else + } else { rdp->qs_pending = 0; + } zero_cpu_stall_ticks(rdp); } } @@ -2130,9 +2131,9 @@ void synchronize_sched_expedited(void) put_online_cpus(); /* No joy, try again later. Or just synchronize_sched(). */ - if (trycount++ < 10) + if (trycount++ < 10) { udelay(trycount * num_online_cpus()); - else { + } else { synchronize_sched(); return; } diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index a9194d5606c4..7f3244c0df01 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -390,8 +390,9 @@ void rcu_read_unlock_special(struct task_struct *t) rnp->grphi, !!rnp->gp_tasks); rcu_report_unblock_qs_rnp(rnp, flags); - } else + } else { raw_spin_unlock_irqrestore(&rnp->lock, flags); + } #ifdef CONFIG_RCU_BOOST /* Unboost if we were boosted. */ @@ -757,9 +758,9 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) int must_wait = 0; raw_spin_lock_irqsave(&rnp->lock, flags); - if (list_empty(&rnp->blkd_tasks)) + if (list_empty(&rnp->blkd_tasks)) { raw_spin_unlock_irqrestore(&rnp->lock, flags); - else { + } else { rnp->exp_tasks = rnp->blkd_tasks.next; rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ must_wait = 1; @@ -803,9 +804,9 @@ void synchronize_rcu_expedited(void) * expedited grace period for us, just leave. */ while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { - if (trycount++ < 10) + if (trycount++ < 10) { udelay(trycount * num_online_cpus()); - else { + } else { synchronize_rcu(); return; } @@ -2093,8 +2094,9 @@ static void rcu_prepare_for_idle(int cpu) if (rcu_cpu_has_callbacks(cpu)) { trace_rcu_prep_idle("More callbacks"); invoke_rcu_core(); - } else + } else { trace_rcu_prep_idle("Callbacks drained"); + } } /* -- cgit v1.2.3 From 5cf05ad758c30d17ff23c2be346b5de982bc2121 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 17 May 2012 15:12:45 -0700 Subject: rcu: Fix broken strings in RCU's source code. Although the C language allows you to break strings across lines, doing this makes it hard for people to find the Linux kernel code corresponding to a given console message. This commit therefore fixes broken strings throughout RCU's source code. Suggested-by: Josh Triplett Suggested-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 3 +-- kernel/rcutorture.c | 33 ++++++++++++++------------------- kernel/rcutree_trace.c | 25 ++++++++++++------------- lib/list_debug.c | 6 ++---- 4 files changed, 29 insertions(+), 38 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index c2c0d86dd3ac..115ead2b5155 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -427,8 +427,7 @@ extern int rcu_my_thread_group_empty(void); static inline void rcu_preempt_sleep_check(void) { rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), - "Illegal context switch in RCU read-side " - "critical section"); + "Illegal context switch in RCU read-side critical section"); } #else /* #ifdef CONFIG_PROVE_RCU */ static inline void rcu_preempt_sleep_check(void) diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 155fb129b641..25b15033c61f 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -49,8 +49,7 @@ #include MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Paul E. McKenney and " - "Josh Triplett "); +MODULE_AUTHOR("Paul E. McKenney and Josh Triplett "); static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */ static int nfakewriters = 4; /* # fake writer threads */ @@ -1200,27 +1199,27 @@ rcu_torture_printk(char *page) } cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); cnt += sprintf(&page[cnt], - "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " - "rtmbe: %d rtbke: %ld rtbre: %ld " - "rtbf: %ld rtb: %ld nt: %ld " - "onoff: %ld/%ld:%ld/%ld " - "barrier: %ld/%ld:%ld", + "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", rcu_torture_current, rcu_torture_current_version, list_empty(&rcu_torture_freelist), atomic_read(&n_rcu_torture_alloc), atomic_read(&n_rcu_torture_alloc_fail), - atomic_read(&n_rcu_torture_free), + atomic_read(&n_rcu_torture_free)); + cnt += sprintf(&page[cnt], "rtmbe: %d rtbke: %ld rtbre: %ld ", atomic_read(&n_rcu_torture_mberror), n_rcu_torture_boost_ktrerror, - n_rcu_torture_boost_rterror, + n_rcu_torture_boost_rterror); + cnt += sprintf(&page[cnt], "rtbf: %ld rtb: %ld nt: %ld ", n_rcu_torture_boost_failure, n_rcu_torture_boosts, - n_rcu_torture_timers, + n_rcu_torture_timers); + cnt += sprintf(&page[cnt], "onoff: %ld/%ld:%ld/%ld ", n_online_successes, n_online_attempts, n_offline_successes, - n_offline_attempts, + n_offline_attempts); + cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld", n_barrier_successes, n_barrier_attempts, n_rcu_torture_barrier_error); @@ -1462,8 +1461,7 @@ rcu_torture_shutdown(void *arg) delta = shutdown_time - jiffies_snap; if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_shutdown task: %lu " - "jiffies remaining\n", + "rcu_torture_shutdown task: %lu jiffies remaining\n", torture_type, delta); schedule_timeout_interruptible(delta); jiffies_snap = ACCESS_ONCE(jiffies); @@ -1515,8 +1513,7 @@ rcu_torture_onoff(void *arg) if (cpu_down(cpu) == 0) { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: " - "offlined %d\n", + "rcu_torture_onoff task: offlined %d\n", torture_type, cpu); n_offline_successes++; } @@ -1529,8 +1526,7 @@ rcu_torture_onoff(void *arg) if (cpu_up(cpu) == 0) { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: " - "onlined %d\n", + "rcu_torture_onoff task: onlined %d\n", torture_type, cpu); n_online_successes++; } @@ -1952,8 +1948,7 @@ rcu_torture_init(void) return -EINVAL; } if (cur_ops->fqs == NULL && fqs_duration != 0) { - printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero " - "fqs_duration, fqs disabled.\n"); + printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n"); fqs_duration = 0; } if (cur_ops->init) diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index a16ddbd6fdc4..abffb486e94e 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -218,8 +218,7 @@ static const struct file_operations rcudata_csv_fops = { static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp) { - seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu " - "j=%04x bt=%04x\n", + seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu ", rnp->grplo, rnp->grphi, "T."[list_empty(&rnp->blkd_tasks)], "N."[!rnp->gp_tasks], @@ -227,11 +226,11 @@ static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp) "B."[!rnp->boost_tasks], convert_kthread_status(rnp->boost_kthread_status), rnp->n_tasks_boosted, rnp->n_exp_boosts, - rnp->n_normal_boosts, + rnp->n_normal_boosts); + seq_printf(m, "j=%04x bt=%04x\n", (int)(jiffies & 0xffff), (int)(rnp->boost_time & 0xffff)); - seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n", - " balk", + seq_printf(m, " balk: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n", rnp->n_balk_blkd_tasks, rnp->n_balk_exp_gp_tasks, rnp->n_balk_boost_tasks, @@ -287,11 +286,11 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) struct rcu_node *rnp; gpnum = rsp->gpnum; - seq_printf(m, "%s: c=%lu g=%lu s=%d jfq=%ld j=%x " - "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", + seq_printf(m, "%s: c=%lu g=%lu s=%d jfq=%ld j=%x ", rsp->name, rsp->completed, gpnum, rsp->fqs_state, (long)(rsp->jiffies_force_qs - jiffies), - (int)(jiffies & 0xffff), + (int)(jiffies & 0xffff)); + seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", rsp->n_force_qs, rsp->n_force_qs_ngp, rsp->n_force_qs - rsp->n_force_qs_ngp, rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen); @@ -378,16 +377,16 @@ static const struct file_operations rcugp_fops = { static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) { - seq_printf(m, "%3d%cnp=%ld " - "qsp=%ld rpq=%ld cbr=%ld cng=%ld " - "gpc=%ld gps=%ld nf=%ld nn=%ld\n", + seq_printf(m, "%3d%cnp=%ld ", rdp->cpu, cpu_is_offline(rdp->cpu) ? '!' : ' ', - rdp->n_rcu_pending, + rdp->n_rcu_pending); + seq_printf(m, "qsp=%ld rpq=%ld cbr=%ld cng=%ld ", rdp->n_rp_qs_pending, rdp->n_rp_report_qs, rdp->n_rp_cb_ready, - rdp->n_rp_cpu_needs_gp, + rdp->n_rp_cpu_needs_gp); + seq_printf(m, "gpc=%ld gps=%ld nf=%ld nn=%ld\n", rdp->n_rp_gp_completed, rdp->n_rp_gp_started, rdp->n_rp_need_fqs, diff --git a/lib/list_debug.c b/lib/list_debug.c index 23a5e031cd8b..c24c2f7e296f 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -87,12 +87,10 @@ void __list_add_rcu(struct list_head *new, struct list_head *prev, struct list_head *next) { WARN(next->prev != prev, - "list_add_rcu corruption. next->prev should be " - "prev (%p), but was %p. (next=%p).\n", + "list_add_rcu corruption. next->prev should be prev (%p), but was %p. (next=%p).\n", prev, next->prev, next); WARN(prev->next != next, - "list_add_rcu corruption. prev->next should be " - "next (%p), but was %p. (prev=%p).\n", + "list_add_rcu corruption. prev->next should be next (%p), but was %p. (prev=%p).\n", next, prev->next, prev); new->next = next; new->prev = prev; -- cgit v1.2.3 From c6a4ebb9ae30ead7684bce623955f74b17df496d Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Fri, 6 Jul 2012 23:56:00 +0200 Subject: MIPS: Provide a symbol for the legacy performance counter interrupt. Based on https://patchwork.linux-mips.org/patch/3576 - but this really deserves its own patchset and the symbol should also be used :) Signed-off-by: Ralf Baechle --- arch/mips/include/asm/irq.h | 1 + arch/mips/kernel/traps.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index fb698dc09bc9..78dbb8a86da2 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -136,6 +136,7 @@ extern void free_irqno(unsigned int irq); * IE7. Since R2 their number has to be read from the c0_intctl register. */ #define CP0_LEGACY_COMPARE_IRQ 7 +#define CP0_LEGACY_PERFCNT_IRQ 7 extern int cp0_compare_irq; extern int cp0_compare_irq_shift; diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 2d0c2a277f52..f985b7292cd9 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -1597,7 +1597,7 @@ void __cpuinit per_cpu_trap_init(bool is_boot_cpu) cp0_perfcount_irq = -1; } else { cp0_compare_irq = CP0_LEGACY_COMPARE_IRQ; - cp0_compare_irq_shift = cp0_compare_irq; + cp0_compare_irq_shift = CP0_LEGACY_PERFCNT_IRQ; cp0_perfcount_irq = -1; } -- cgit v1.2.3 From f0b77f2c0eed1e37c96b9a995d5a45e9eb4aaca8 Mon Sep 17 00:00:00 2001 From: "Steven J. Hill" Date: Fri, 6 Jul 2012 23:56:00 +0200 Subject: MIPS: Clean-up GIC and vectored interrupts. This change adds macros for routing of GIC interrupts for EIC and non-EIC hardware modes. Also added Malta GIC macros having to do with performance and timer interrupts. Signed-off-by: Steven J. Hill Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/3576/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/gic.h | 15 ++++++++++++++- arch/mips/include/asm/mips-boards/maltaint.h | 10 ++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/gic.h b/arch/mips/include/asm/gic.h index 86548da650e7..991b659e2548 100644 --- a/arch/mips/include/asm/gic.h +++ b/arch/mips/include/asm/gic.h @@ -206,7 +206,7 @@ #define GIC_VPE_EIC_SHADOW_SET_BASE 0x0100 #define GIC_VPE_EIC_SS(intr) \ - (GIC_EIC_SHADOW_SET_BASE + (4 * intr)) + (GIC_VPE_EIC_SHADOW_SET_BASE + (4 * intr)) #define GIC_VPE_EIC_VEC_BASE 0x0800 #define GIC_VPE_EIC_VEC(intr) \ @@ -330,6 +330,17 @@ struct gic_intr_map { #define GIC_FLAG_TRANSPARENT 0x02 }; +/* + * This is only used in EIC mode. This helps to figure out which + * shared interrupts we need to process when we get a vector interrupt. + */ +#define GIC_MAX_SHARED_INTR 0x5 +struct gic_shared_intr_map { + unsigned int num_shared_intr; + unsigned int intr_list[GIC_MAX_SHARED_INTR]; + unsigned int local_intr_mask; +}; + extern void gic_init(unsigned long gic_base_addr, unsigned long gic_addrspace_size, struct gic_intr_map *intrmap, unsigned int intrmap_size, unsigned int irqbase); @@ -338,5 +349,7 @@ extern unsigned int gic_get_int(void); extern void gic_send_ipi(unsigned int intr); extern unsigned int plat_ipi_call_int_xlate(unsigned int); extern unsigned int plat_ipi_resched_int_xlate(unsigned int); +extern void gic_bind_eic_interrupt(int irq, int set); +extern unsigned int gic_get_timer_pending(void); #endif /* _ASM_GICREGS_H */ diff --git a/arch/mips/include/asm/mips-boards/maltaint.h b/arch/mips/include/asm/mips-boards/maltaint.h index d11aa02a956a..5447d9fc4219 100644 --- a/arch/mips/include/asm/mips-boards/maltaint.h +++ b/arch/mips/include/asm/mips-boards/maltaint.h @@ -86,6 +86,16 @@ #define GIC_CPU_INT4 4 /* . */ #define GIC_CPU_INT5 5 /* Core Interrupt 5 */ +/* MALTA GIC local interrupts */ +#define GIC_INT_TMR (GIC_CPU_INT5) +#define GIC_INT_PERFCTR (GIC_CPU_INT5) + +/* GIC constants */ +/* Add 2 to convert non-eic hw int # to eic vector # */ +#define GIC_CPU_TO_VEC_OFFSET (2) +/* If we map an intr to pin X, GIC will actually generate vector X+1 */ +#define GIC_PIN_TO_VEC_OFFSET (1) + #define GIC_EXT_INTR(x) x /* External Interrupts used for IPI */ -- cgit v1.2.3 From 839efb4ffbfba74857e3411413f7ca53c8ff1925 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Fri, 6 Jul 2012 23:56:00 +0200 Subject: MIPS: MT: Fix indentation damage. Split off from https://patchwork.linux-mips.org/patch/3603/. Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mipsmtregs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/mipsmtregs.h b/arch/mips/include/asm/mipsmtregs.h index c9420aa97e32..e71ff4c317f2 100644 --- a/arch/mips/include/asm/mipsmtregs.h +++ b/arch/mips/include/asm/mipsmtregs.h @@ -48,7 +48,7 @@ #define CP0_VPECONF0 $1, 2 #define CP0_VPECONF1 $1, 3 #define CP0_YQMASK $1, 4 -#define CP0_VPESCHEDULE $1, 5 +#define CP0_VPESCHEDULE $1, 5 #define CP0_VPESCHEFBK $1, 6 #define CP0_TCSTATUS $2, 1 #define CP0_TCBIND $2, 2 -- cgit v1.2.3 From 113c62d9844d9037508fa156e47db1b5407a27c3 Mon Sep 17 00:00:00 2001 From: "Steven J. Hill" Date: Fri, 6 Jul 2012 23:56:00 +0200 Subject: MIPS: Add support for the M14Kc core. [ralf@linux-mips.org: Fixed whitespace damage.] Signed-off-by: Steven J. Hill Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/3773/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/cpu.h | 5 +++-- arch/mips/kernel/cpu-probe.c | 7 ++++++- arch/mips/mm/c-r4k.c | 1 + arch/mips/mm/tlbex.c | 2 ++ arch/mips/oprofile/common.c | 1 + arch/mips/oprofile/op_model_mipsxx.c | 4 ++++ 6 files changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index f9fa2a479dd0..c64910586b74 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -94,6 +94,7 @@ #define PRID_IMP_24KE 0x9600 #define PRID_IMP_74K 0x9700 #define PRID_IMP_1004K 0x9900 +#define PRID_IMP_M14KC 0x9c00 /* * These are the PRID's for when 23:16 == PRID_COMP_SIBYTE @@ -260,7 +261,7 @@ enum cpu_type_enum { */ CPU_4KC, CPU_4KEC, CPU_4KSC, CPU_24K, CPU_34K, CPU_1004K, CPU_74K, CPU_ALCHEMY, CPU_PR4450, CPU_BMIPS32, CPU_BMIPS3300, CPU_BMIPS4350, - CPU_BMIPS4380, CPU_BMIPS5000, CPU_JZRISC, + CPU_BMIPS4380, CPU_BMIPS5000, CPU_JZRISC, CPU_M14KC, /* * MIPS64 class processors @@ -288,7 +289,7 @@ enum cpu_type_enum { #define MIPS_CPU_ISA_M64R2 0x00000100 #define MIPS_CPU_ISA_32BIT (MIPS_CPU_ISA_I | MIPS_CPU_ISA_II | \ - MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 ) + MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2) #define MIPS_CPU_ISA_64BIT (MIPS_CPU_ISA_III | MIPS_CPU_ISA_IV | \ MIPS_CPU_ISA_V | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 6ae7ce4ac63e..aaf39f3eaa51 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -4,7 +4,7 @@ * Copyright (C) xxxx the Anonymous * Copyright (C) 1994 - 2006 Ralf Baechle * Copyright (C) 2003, 2004 Maciej W. Rozycki - * Copyright (C) 2001, 2004 MIPS Inc. + * Copyright (C) 2001, 2004, 2011, 2012 MIPS Technologies, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -199,6 +199,7 @@ void __init check_wait(void) cpu_wait = rm7k_wait_irqoff; break; + case CPU_M14KC: case CPU_24K: case CPU_34K: case CPU_1004K: @@ -831,6 +832,10 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu) c->cputype = CPU_74K; __cpu_name[cpu] = "MIPS 74Kc"; break; + case PRID_IMP_M14KC: + c->cputype = CPU_M14KC; + __cpu_name[cpu] = "MIPS M14Kc"; + break; case PRID_IMP_1004K: c->cputype = CPU_1004K; __cpu_name[cpu] = "MIPS 1004Kc"; diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 5109be96d98d..e56efd059189 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1051,6 +1051,7 @@ static void __cpuinit probe_pcache(void) case CPU_R14000: break; + case CPU_M14KC: case CPU_24K: case CPU_34K: case CPU_74K: diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 0bc485b3cd60..03eb0ef91580 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -9,6 +9,7 @@ * Copyright (C) 2005, 2007, 2008, 2009 Maciej W. Rozycki * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) * Copyright (C) 2008, 2009 Cavium Networks, Inc. + * Copyright (C) 2011 MIPS Technologies, Inc. * * ... and the days got worse and worse and now you see * I've gone completly out of my mind. @@ -494,6 +495,7 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l, case CPU_R14000: case CPU_4KC: case CPU_4KEC: + case CPU_M14KC: case CPU_SB1: case CPU_SB1A: case CPU_4KSC: diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c index d1f2d4c52d42..b6e378211a2c 100644 --- a/arch/mips/oprofile/common.c +++ b/arch/mips/oprofile/common.c @@ -78,6 +78,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) switch (current_cpu_type()) { case CPU_5KC: + case CPU_M14KC: case CPU_20KC: case CPU_24K: case CPU_25KF: diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c index baba3bcaa3c2..4d80a856048d 100644 --- a/arch/mips/oprofile/op_model_mipsxx.c +++ b/arch/mips/oprofile/op_model_mipsxx.c @@ -322,6 +322,10 @@ static int __init mipsxx_init(void) op_model_mipsxx_ops.num_counters = counters; switch (current_cpu_type()) { + case CPU_M14KC: + op_model_mipsxx_ops.cpu_type = "mips/M14Kc"; + break; + case CPU_20KC: op_model_mipsxx_ops.cpu_type = "mips/20K"; break; -- cgit v1.2.3 From 3da947b2693993d5f6138f005d2625e9387c9618 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 2 Jul 2012 01:29:55 +0000 Subject: ieee802154: verify packet size before trying to allocate it Currently when sending data over datagram, the send function will attempt to allocate any size passed on from the userspace. We should make sure that this size is checked and limited. We'll limit it to the MTU of the device, which is checked later anyway. Signed-off-by: Sasha Levin Signed-off-by: David S. Miller --- net/ieee802154/dgram.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 6fbb2ad7bb6d..16705611589a 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -230,6 +230,12 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, mtu = dev->mtu; pr_debug("name = %s, mtu = %u\n", dev->name, mtu); + if (size > mtu) { + pr_debug("size = %Zu, mtu = %u\n", size, mtu); + err = -EINVAL; + goto out_dev; + } + hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; skb = sock_alloc_send_skb(sk, hlen + tlen + size, @@ -258,12 +264,6 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, if (err < 0) goto out_skb; - if (size > mtu) { - pr_debug("size = %Zu, mtu = %u\n", size, mtu); - err = -EINVAL; - goto out_skb; - } - skb->dev = dev; skb->sk = sk; skb->protocol = htons(ETH_P_IEEE802154); -- cgit v1.2.3 From acfa9e94e2f06f8911a1d2f257880af4ad945eef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Jul 2012 08:36:12 +0000 Subject: net: dont use __netdev_alloc_skb for bounce buffer commit a1c7fff7e1 (net: netdev_alloc_skb() use build_skb()) broke b44 on some 64bit machines. It appears b44 and b43 use __netdev_alloc_skb() instead of alloc_skb() for their bounce buffers. There is no need to add an extra NET_SKB_PAD reservation for bounce buffers : - In TX path, NET_SKB_PAD is useless - In RX path in b44, we force a copy of incoming frames if GFP_DMA allocations were needed. Reported-and-bisected-by: Stefan Bader Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/b44.c | 4 ++-- drivers/net/wireless/b43legacy/dma.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 46b8b7d81633..d09c6b583d17 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -656,7 +656,7 @@ static int b44_alloc_rx_skb(struct b44 *bp, int src_idx, u32 dest_idx_unmasked) dma_unmap_single(bp->sdev->dma_dev, mapping, RX_PKT_BUF_SZ, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); - skb = __netdev_alloc_skb(bp->dev, RX_PKT_BUF_SZ, GFP_ATOMIC|GFP_DMA); + skb = alloc_skb(RX_PKT_BUF_SZ, GFP_ATOMIC | GFP_DMA); if (skb == NULL) return -ENOMEM; mapping = dma_map_single(bp->sdev->dma_dev, skb->data, @@ -967,7 +967,7 @@ static netdev_tx_t b44_start_xmit(struct sk_buff *skb, struct net_device *dev) dma_unmap_single(bp->sdev->dma_dev, mapping, len, DMA_TO_DEVICE); - bounce_skb = __netdev_alloc_skb(dev, len, GFP_ATOMIC | GFP_DMA); + bounce_skb = alloc_skb(len, GFP_ATOMIC | GFP_DMA); if (!bounce_skb) goto err_out; diff --git a/drivers/net/wireless/b43legacy/dma.c b/drivers/net/wireless/b43legacy/dma.c index f1f8bd09bd87..c8baf020c20f 100644 --- a/drivers/net/wireless/b43legacy/dma.c +++ b/drivers/net/wireless/b43legacy/dma.c @@ -1072,7 +1072,7 @@ static int dma_tx_fragment(struct b43legacy_dmaring *ring, meta->dmaaddr = map_descbuffer(ring, skb->data, skb->len, 1); /* create a bounce buffer in zone_dma on mapping failure. */ if (b43legacy_dma_mapping_error(ring, meta->dmaaddr, skb->len, 1)) { - bounce_skb = __dev_alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA); + bounce_skb = alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA); if (!bounce_skb) { ring->current_slot = old_top_slot; ring->used_slots = old_used_slots; -- cgit v1.2.3 From b94e52f62683dc0b00c6d1b58b80929a078c0fd5 Mon Sep 17 00:00:00 2001 From: Cloud Ren Date: Tue, 3 Jul 2012 16:51:48 +0000 Subject: atl1c: fix issue of transmit queue 0 timed out some people report atl1c could cause system hang with following kernel trace info: --------------------------------------- WARNING: at.../net/sched/sch_generic.c:258 dev_watchdog+0x1db/0x1d0() ... NETDEV WATCHDOG: eth0 (atl1c): transmit queue 0 timed out ... --------------------------------------- This is caused by netif_stop_queue calling when cable Link is down. So remove netif_stop_queue, because link_watch will take it over. Signed-off-by: xiong Cc: stable Signed-off-by: Cloud Ren Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 9cc15701101b..1f78b63d5efe 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -261,7 +261,6 @@ static void atl1c_check_link_status(struct atl1c_adapter *adapter) if ((phy_data & BMSR_LSTATUS) == 0) { /* link down */ netif_carrier_off(netdev); - netif_stop_queue(netdev); hw->hibernate = true; if (atl1c_reset_mac(hw) != 0) if (netif_msg_hw(adapter)) -- cgit v1.2.3 From 960fb66e520a405dde39ff883f17ff2669c13d85 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 3 Jul 2012 20:55:21 +0000 Subject: netem: add limitation to reordered packets Fix two netem bugs : 1) When a frame was dropped by tfifo_enqueue(), drop counter was incremented twice. 2) When reordering is triggered, we enqueue a packet without checking queue limit. This can OOM pretty fast when this is repeated enough, since skbs are orphaned, no socket limit can help in this situation. Signed-off-by: Eric Dumazet Cc: Mark Gordon Cc: Andreas Terzis Cc: Yuchung Cheng Cc: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a2a95aabf9c2..c412ad0d0308 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -331,29 +331,22 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche return PSCHED_NS2TICKS(ticks); } -static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) +static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { struct sk_buff_head *list = &sch->q; psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; - struct sk_buff *skb; - - if (likely(skb_queue_len(list) < sch->limit)) { - skb = skb_peek_tail(list); - /* Optimize for add at tail */ - if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send)) - return qdisc_enqueue_tail(nskb, sch); + struct sk_buff *skb = skb_peek_tail(list); - skb_queue_reverse_walk(list, skb) { - if (tnext >= netem_skb_cb(skb)->time_to_send) - break; - } + /* Optimize for add at tail */ + if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send)) + return __skb_queue_tail(list, nskb); - __skb_queue_after(list, skb, nskb); - sch->qstats.backlog += qdisc_pkt_len(nskb); - return NET_XMIT_SUCCESS; + skb_queue_reverse_walk(list, skb) { + if (tnext >= netem_skb_cb(skb)->time_to_send) + break; } - return qdisc_reshape_fail(nskb, sch); + __skb_queue_after(list, skb, nskb); } /* @@ -368,7 +361,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* We don't fill cb now as skb_unshare() may invalidate it */ struct netem_skb_cb *cb; struct sk_buff *skb2; - int ret; int count = 1; /* Random duplication */ @@ -419,6 +411,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); } + if (unlikely(skb_queue_len(&sch->q) >= sch->limit)) + return qdisc_reshape_fail(skb, sch); + + sch->qstats.backlog += qdisc_pkt_len(skb); + cb = netem_skb_cb(skb); if (q->gap == 0 || /* not doing reordering */ q->counter < q->gap - 1 || /* inside last reordering gap */ @@ -450,7 +447,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) cb->time_to_send = now + delay; ++q->counter; - ret = tfifo_enqueue(skb, sch); + tfifo_enqueue(skb, sch); } else { /* * Do re-ordering by putting one out of N packets at the front @@ -460,16 +457,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->counter = 0; __skb_queue_head(&sch->q, skb); - sch->qstats.backlog += qdisc_pkt_len(skb); sch->qstats.requeues++; - ret = NET_XMIT_SUCCESS; - } - - if (ret != NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - return ret; - } } return NET_XMIT_SUCCESS; -- cgit v1.2.3 From d4e41649434cd6db2e69783130cba81886dac97f Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 4 Jul 2012 02:00:25 +0000 Subject: ixgbe: DCB and SR-IOV can not co-exist and will cause hangs DCB and SR-IOV cannot currently be enabled at the same time as the queueing schemes are incompatible. If they are both enabled it will result in Tx hangs since only the first Tx queue will be able to transmit any traffic. This simple fix for this is to block us from enabling TCs in ixgbe_setup_tc if SR-IOV is enabled. This change will be reverted once we can support SR-IOV and DCB coexistence. Signed-off-by: Alexander Duyck Acked-by: John Fastabend Tested-by: Phil Schmitt Tested-by: Ross Brattain Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 18ca3bcadf0c..e242104ab471 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6647,6 +6647,11 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) return -EINVAL; } + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { + e_err(drv, "Enable failed, SR-IOV enabled\n"); + return -EINVAL; + } + /* Hardware supports up to 8 traffic classes */ if (tc > adapter->dcb_cfg.num_tcs.pg_tcs || (hw->mac.type == ixgbe_mac_82598EB && -- cgit v1.2.3 From b93984c9afacd4fe32b785d52a93660d91202b10 Mon Sep 17 00:00:00 2001 From: David Daney Date: Wed, 4 Jul 2012 12:06:16 +0000 Subject: netdev/phy: Fixup lockdep warnings in mdio-mux.c With lockdep enabled we get: ============================================= [ INFO: possible recursive locking detected ] 3.4.4-Cavium-Octeon+ #313 Not tainted --------------------------------------------- kworker/u:1/36 is trying to acquire lock: (&bus->mdio_lock){+.+...}, at: [] mdio_mux_read+0x38/0xa0 but task is already holding lock: (&bus->mdio_lock){+.+...}, at: [] mdiobus_read+0x44/0x88 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&bus->mdio_lock); lock(&bus->mdio_lock); *** DEADLOCK *** May be due to missing lock nesting notation . . . This is a false positive, since we are indeed using 'nested' locking, we need to use mutex_lock_nested(). Now in theory we can stack multiple MDIO multiplexers, but that would require passing the nesting level (which is difficult to know) to mutex_lock_nested(). Instead we assume the simple case of a single level of nesting. Since these are only warning messages, it isn't so important to solve the general case. Signed-off-by: David Daney Signed-off-by: David S. Miller --- drivers/net/phy/mdio-mux.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c index 39ea0674dcde..5c120189ec86 100644 --- a/drivers/net/phy/mdio-mux.c +++ b/drivers/net/phy/mdio-mux.c @@ -46,7 +46,13 @@ static int mdio_mux_read(struct mii_bus *bus, int phy_id, int regnum) struct mdio_mux_parent_bus *pb = cb->parent; int r; - mutex_lock(&pb->mii_bus->mdio_lock); + /* In theory multiple mdio_mux could be stacked, thus creating + * more than a single level of nesting. But in practice, + * SINGLE_DEPTH_NESTING will cover the vast majority of use + * cases. We use it, instead of trying to handle the general + * case. + */ + mutex_lock_nested(&pb->mii_bus->mdio_lock, SINGLE_DEPTH_NESTING); r = pb->switch_fn(pb->current_child, cb->bus_number, pb->switch_data); if (r) goto out; @@ -71,7 +77,7 @@ static int mdio_mux_write(struct mii_bus *bus, int phy_id, int r; - mutex_lock(&pb->mii_bus->mdio_lock); + mutex_lock_nested(&pb->mii_bus->mdio_lock, SINGLE_DEPTH_NESTING); r = pb->switch_fn(pb->current_child, cb->bus_number, pb->switch_data); if (r) goto out; -- cgit v1.2.3 From b761c9b1f4f69eb53fb6147547a1ab25237a93b3 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Wed, 4 Jul 2012 23:28:40 +0000 Subject: cgroup: fix panic in netprio_cgroup we set max_prioidx to the first zero bit index of prioidx_map in function get_prioidx. So when we delete the low index netprio cgroup and adding a new netprio cgroup again,the max_prioidx will be set to the low index. when we set the high index cgroup's net_prio.ifpriomap,the function write_priomap will call update_netdev_tables to alloc memory which size is sizeof(struct netprio_map) + sizeof(u32) * (max_prioidx + 1), so the size of array that map->priomap point to is max_prioidx +1, which is low than what we actually need. fix this by adding check in get_prioidx,only set max_prioidx when max_prioidx low than the new prioidx. Signed-off-by: Gao feng Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 5b8aa2fae48b..aa907ed466ea 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -49,8 +49,9 @@ static int get_prioidx(u32 *prio) return -ENOSPC; } set_bit(prioidx, prioidx_map); + if (atomic_read(&max_prioidx) < prioidx) + atomic_set(&max_prioidx, prioidx); spin_unlock_irqrestore(&prioidx_map_lock, flags); - atomic_set(&max_prioidx, prioidx); *prio = prioidx; return 0; } -- cgit v1.2.3 From 6fecd35d4cd79fc75e8290abb86734c18500d2a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Thu, 5 Jul 2012 01:13:33 +0000 Subject: net: qmi_wwan: add ZTE MF60 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding a device with limited QMI support. It does not support normal QMI_WDS commands for connection management. Instead, sending a QMI_CTL SET_INSTANCE_ID command is required to enable the network interface: 01 0f 00 00 00 00 00 00 20 00 04 00 01 01 00 00 A number of QMI_DMS and QMI_NAS commands are also supported for optional device management. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index b01960fcfbc9..a051cedd64bd 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -346,6 +346,15 @@ static const struct driver_info qmi_wwan_force_int1 = { .data = BIT(1), /* interface whitelist bitmap */ }; +static const struct driver_info qmi_wwan_force_int2 = { + .description = "Qualcomm WWAN/QMI device", + .flags = FLAG_WWAN, + .bind = qmi_wwan_bind_shared, + .unbind = qmi_wwan_unbind_shared, + .manage_power = qmi_wwan_manage_power, + .data = BIT(2), /* interface whitelist bitmap */ +}; + static const struct driver_info qmi_wwan_force_int3 = { .description = "Qualcomm WWAN/QMI device", .flags = FLAG_WWAN, @@ -498,6 +507,15 @@ static const struct usb_device_id products[] = { .bInterfaceProtocol = 0xff, .driver_info = (unsigned long)&qmi_wwan_force_int4, }, + { /* ZTE MF60 */ + .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x19d2, + .idProduct = 0x1402, + .bInterfaceClass = 0xff, + .bInterfaceSubClass = 0xff, + .bInterfaceProtocol = 0xff, + .driver_info = (unsigned long)&qmi_wwan_force_int2, + }, { /* Sierra Wireless MC77xx in QMI mode */ .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x1199, -- cgit v1.2.3 From 054581e6c1eb314d54d4747fba545e9802be29da Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 5 Jul 2012 14:21:55 +0000 Subject: cnic: Don't use netdev->base_addr commit c0357e975afdbbedab5c662d19bef865f02adc17 bnx2: stop using net_device.{base_addr, irq}. removed netdev->base_addr so we need to update cnic to get the MMIO base address from pci_resource_start(). Otherwise, mmap of the uio device will fail. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/cnic.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index c95e7b5e2b85..3c95065e0def 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -1053,12 +1053,13 @@ static int cnic_init_uio(struct cnic_dev *dev) uinfo = &udev->cnic_uinfo; - uinfo->mem[0].addr = dev->netdev->base_addr; + uinfo->mem[0].addr = pci_resource_start(dev->pcidev, 0); uinfo->mem[0].internal_addr = dev->regview; - uinfo->mem[0].size = dev->netdev->mem_end - dev->netdev->mem_start; uinfo->mem[0].memtype = UIO_MEM_PHYS; if (test_bit(CNIC_F_BNX2_CLASS, &dev->flags)) { + uinfo->mem[0].size = MB_GET_CID_ADDR(TX_TSS_CID + + TX_MAX_TSS_RINGS + 1); uinfo->mem[1].addr = (unsigned long) cp->status_blk.gen & PAGE_MASK; if (cp->ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) @@ -1068,6 +1069,8 @@ static int cnic_init_uio(struct cnic_dev *dev) uinfo->name = "bnx2_cnic"; } else if (test_bit(CNIC_F_BNX2X_CLASS, &dev->flags)) { + uinfo->mem[0].size = pci_resource_len(dev->pcidev, 0); + uinfo->mem[1].addr = (unsigned long) cp->bnx2x_def_status_blk & PAGE_MASK; uinfo->mem[1].size = sizeof(*cp->bnx2x_def_status_blk); -- cgit v1.2.3 From a73f89a61f92b364f0b4a3be412b5b70553afc23 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 29 Jun 2012 09:42:28 +0000 Subject: netfilter: ipset: timeout fixing bug broke SET target special timeout value The patch "127f559 netfilter: ipset: fix timeout value overflow bug" broke the SET target when no timeout was specified. Reported-by: Jean-Philippe Menil Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_set.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 035960ec5cb9..c6f7db720d84 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -16,6 +16,7 @@ #include #include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -310,7 +311,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) info->del_set.flags, 0, UINT_MAX); /* Normalize to fit into jiffies */ - if (add_opt.timeout > UINT_MAX/MSEC_PER_SEC) + if (add_opt.timeout != IPSET_NO_TIMEOUT && + add_opt.timeout > UINT_MAX/MSEC_PER_SEC) add_opt.timeout = UINT_MAX/MSEC_PER_SEC; if (info->add_set.index != IPSET_INVALID_ID) ip_set_add(info->add_set.index, skb, par, &add_opt); -- cgit v1.2.3 From 6bd0405bb4196b44f1acb7a58f11382cdaf6f7f0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 5 Jul 2012 15:42:10 +0200 Subject: netfilter: nf_ct_ecache: fix crash with multiple containers, one shutting down Hans reports that he's still hitting: BUG: unable to handle kernel NULL pointer dereference at 000000000000027c IP: [] netlink_has_listeners+0xb/0x60 PGD 0 Oops: 0000 [#3] PREEMPT SMP CPU 0 It happens when adding a number of containers with do: nfct_query(h, NFCT_Q_CREATE, ct); and most likely one namespace shuts down. this problem was supposed to be fixed by: 70e9942 netfilter: nf_conntrack: make event callback registration per-netns Still, it was missing one rcu_access_pointer to check if the callback is set or not. Reported-by: Hans Schillstrom Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_ecache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index a88fb6939387..e1ce1048fe5f 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -78,7 +78,7 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *e; - if (net->ct.nf_conntrack_event_cb == NULL) + if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) return; e = nf_ct_ecache_find(ct); -- cgit v1.2.3 From 0e050923a797c1fc46ccc1e5182fd3090f33a75d Mon Sep 17 00:00:00 2001 From: Frank Kunz Date: Thu, 5 Jul 2012 22:32:49 +0200 Subject: HID: add Sennheiser BTD500USB device support The Sennheiser BTD500USB composit device requires the HID_QUIRK_NOGET flag to be set for working proper. Without the flag the device crashes during hid intialization. Signed-off-by: Frank Kunz Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index d1cdd2d28409..fc0c68e4b9ed 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -653,6 +653,9 @@ #define USB_DEVICE_ID_SAMSUNG_IR_REMOTE 0x0001 #define USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE 0x0600 +#define USB_VENDOR_ID_SENNHEISER 0x1395 +#define USB_DEVICE_ID_SENNHEISER_BTD500USB 0x002c + #define USB_VENDOR_ID_SIGMA_MICRO 0x1c4f #define USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD 0x0002 diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 0597ee604f6e..903eef3d3e10 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -76,6 +76,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_PRODIGE, USB_DEVICE_ID_PRODIGE_CORDLESS, HID_QUIRK_NOGET }, { USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_PIXART_IMAGING_INC_OPTICAL_TOUCH_SCREEN, HID_QUIRK_NOGET }, { USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_SENNHEISER, USB_DEVICE_ID_SENNHEISER_BTD500USB, HID_QUIRK_NOGET }, { USB_VENDOR_ID_SUN, USB_DEVICE_ID_RARITAN_KVM_DONGLE, HID_QUIRK_NOGET }, { USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_1, HID_QUIRK_NOGET }, { USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_2, HID_QUIRK_NOGET }, -- cgit v1.2.3 From 472dd35ccbd999a6cb2994c318ca16edf65c4359 Mon Sep 17 00:00:00 2001 From: Thomas Huehn Date: Fri, 29 Jun 2012 06:26:27 -0700 Subject: mac80211: correct size the argument to kzalloc in minstrel_ht msp has type struct minstrel_ht_sta_priv not struct minstrel_ht_sta. (This incorporates the fixup originally posted as "mac80211: fix kzalloc memory corruption introduced in minstrel_ht". -- JWL) Reported-by: Fengguang Wu Reported-by: Dan Carpenter Signed-off-by: Thomas Huehn Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/rc80211_minstrel_ht.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 2d1acc6c5445..f9e51ef8dfa2 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -809,7 +809,7 @@ minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) max_rates = sband->n_bitrates; } - msp = kzalloc(sizeof(struct minstrel_ht_sta), gfp); + msp = kzalloc(sizeof(*msp), gfp); if (!msp) return NULL; -- cgit v1.2.3 From 147f20e316f3949f3f5ffe6c8658e9fe1c6ceb23 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 30 Jun 2012 11:56:47 +0200 Subject: NFC: Prevent NULL deref when getting socket name llcp_sock_getname can be called without a device attached to the nfc_llcp_sock. This would lead to the following BUG: [ 362.341807] BUG: unable to handle kernel NULL pointer dereference at (null) [ 362.341815] IP: [] llcp_sock_getname+0x75/0xc0 [ 362.341818] PGD 31b35067 PUD 30631067 PMD 0 [ 362.341821] Oops: 0000 [#627] PREEMPT SMP DEBUG_PAGEALLOC [ 362.341826] CPU 3 [ 362.341827] Pid: 7816, comm: trinity-child55 Tainted: G D W 3.5.0-rc4-next-20120628-sasha-00005-g9f23eb7 #479 [ 362.341831] RIP: 0010:[] [] llcp_sock_getname+0x75/0xc0 [ 362.341832] RSP: 0018:ffff8800304fde88 EFLAGS: 00010286 [ 362.341834] RAX: 0000000000000000 RBX: ffff880033cb8000 RCX: 0000000000000001 [ 362.341835] RDX: ffff8800304fdec4 RSI: ffff8800304fdec8 RDI: ffff8800304fdeda [ 362.341836] RBP: ffff8800304fdea8 R08: 7ebcebcb772b7ffb R09: 5fbfcb9c35bdfd53 [ 362.341838] R10: 4220020c54326244 R11: 0000000000000246 R12: ffff8800304fdec8 [ 362.341839] R13: ffff8800304fdec4 R14: ffff8800304fdec8 R15: 0000000000000044 [ 362.341841] FS: 00007effa376e700(0000) GS:ffff880035a00000(0000) knlGS:0000000000000000 [ 362.341843] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 362.341844] CR2: 0000000000000000 CR3: 0000000030438000 CR4: 00000000000406e0 [ 362.341851] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 362.341856] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 362.341858] Process trinity-child55 (pid: 7816, threadinfo ffff8800304fc000, task ffff880031270000) [ 362.341858] Stack: [ 362.341862] ffff8800304fdea8 ffff880035156780 0000000000000000 0000000000001000 [ 362.341865] ffff8800304fdf78 ffffffff83183b40 00000000304fdec8 0000006000000000 [ 362.341868] ffff8800304f0027 ffffffff83729649 ffff8800304fdee8 ffff8800304fdf48 [ 362.341869] Call Trace: [ 362.341874] [] sys_getpeername+0xa0/0x110 [ 362.341877] [] ? _raw_spin_unlock_irq+0x59/0x80 [ 362.341882] [] ? do_setitimer+0x23b/0x290 [ 362.341886] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 362.341889] [] system_call_fastpath+0x16/0x1b [ 362.341921] Code: 84 00 00 00 00 00 b8 b3 ff ff ff 48 85 db 74 54 66 41 c7 04 24 27 00 49 8d 7c 24 12 41 c7 45 00 60 00 00 00 48 8b 83 28 05 00 00 <8b> 00 41 89 44 24 04 0f b6 83 41 05 00 00 41 88 44 24 10 0f b6 [ 362.341924] RIP [] llcp_sock_getname+0x75/0xc0 [ 362.341925] RSP [ 362.341926] CR2: 0000000000000000 [ 362.341928] ---[ end trace 6d450e935ee18bf3 ]--- Signed-off-by: Sasha Levin Signed-off-by: John W. Linville --- net/nfc/llcp/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index 17a707db40eb..e06d458fc719 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -292,7 +292,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *addr, pr_debug("%p\n", sk); - if (llcp_sock == NULL) + if (llcp_sock == NULL || llcp_sock->dev == NULL) return -EBADFD; addr->sa_family = AF_NFC; -- cgit v1.2.3 From 10a9109f2705fdc3caa94d768b2559587a9a050c Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Mon, 2 Jul 2012 14:42:03 +0300 Subject: mac80211: destroy assoc_data correctly if assoc fails If association failed due to internal error (e.g. no supported rates IE), we call ieee80211_destroy_assoc_data() with assoc=true, while we actually reject the association. This results in the BSSID not being zeroed out. After passing assoc=false, we no longer have to call sta_info_destroy_addr() explicitly. While on it, move the "associated" message after the assoc_success check. Cc: stable@vger.kernel.org [3.4+] Signed-off-by: Eliad Peller Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a4bb856de08f..0db5d34a06b6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2174,15 +2174,13 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, sdata->name, mgmt->sa, status_code); ieee80211_destroy_assoc_data(sdata, false); } else { - printk(KERN_DEBUG "%s: associated\n", sdata->name); - if (!ieee80211_assoc_success(sdata, *bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ - ieee80211_destroy_assoc_data(sdata, true); - sta_info_destroy_addr(sdata, mgmt->bssid); + ieee80211_destroy_assoc_data(sdata, false); cfg80211_put_bss(*bss); return RX_MGMT_CFG80211_ASSOC_TIMEOUT; } + printk(KERN_DEBUG "%s: associated\n", sdata->name); /* * destroy assoc_data afterwards, as otherwise an idle -- cgit v1.2.3 From b3190466b0b6d336eddd10319c64a3ce3029b3ff Mon Sep 17 00:00:00 2001 From: Bing Zhao Date: Tue, 3 Jul 2012 15:53:13 -0700 Subject: mwifiex: fix Coverity SCAN CID 709078: Resource leak (RESOURCE_LEAK) > *. CID 709078: Resource leak (RESOURCE_LEAK) > - drivers/net/wireless/mwifiex/cfg80211.c, line: 935 > Assigning: "bss_cfg" = storage returned from "kzalloc(132UL, 208U)" > - but was not free > drivers/net/wireless/mwifiex/cfg80211.c:935 Signed-off-by: Bing Zhao Signed-off-by: John W. Linville --- drivers/net/wireless/mwifiex/cfg80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index ce61b6fae1c9..5c7fd185373c 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -958,6 +958,7 @@ static int mwifiex_cfg80211_start_ap(struct wiphy *wiphy, case NL80211_HIDDEN_SSID_ZERO_CONTENTS: /* firmware doesn't support this type of hidden SSID */ default: + kfree(bss_cfg); return -EINVAL; } -- cgit v1.2.3 From efd821182cec8c92babef6e00a95066d3252fda4 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 4 Jul 2012 13:10:02 +0200 Subject: rt2x00usb: fix indexes ordering on RX queue kick On rt2x00_dmastart() we increase index specified by Q_INDEX and on rt2x00_dmadone() we increase index specified by Q_INDEX_DONE. So entries between Q_INDEX_DONE and Q_INDEX are those we currently process in the hardware. Entries between Q_INDEX and Q_INDEX_DONE are those we can submit to the hardware. According to that fix rt2x00usb_kick_queue(), as we need to submit RX entries that are not processed by the hardware. It worked before only for empty queue, otherwise was broken. Note that for TX queues indexes ordering are ok. We need to kick entries that have filled skb, but was not submitted to the hardware, i.e. started from Q_INDEX_DONE and have ENTRY_DATA_PENDING bit set. From practical standpoint this fixes RX queue stall, usually reproducible in AP mode, like for example reported here: https://bugzilla.redhat.com/show_bug.cgi?id=828824 Reported-and-tested-by: Franco Miceli Reported-and-tested-by: Tom Horsley Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c index d357d1ed92f6..74ecc33fdd90 100644 --- a/drivers/net/wireless/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/rt2x00/rt2x00usb.c @@ -436,8 +436,8 @@ void rt2x00usb_kick_queue(struct data_queue *queue) case QID_RX: if (!rt2x00queue_full(queue)) rt2x00queue_for_each_entry(queue, - Q_INDEX_DONE, Q_INDEX, + Q_INDEX_DONE, NULL, rt2x00usb_kick_rx_entry); break; -- cgit v1.2.3 From c2ca7d92ed4bbd779516beb6eb226e19f7f7ab0f Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 4 Jul 2012 13:20:20 +0200 Subject: iwlegacy: always monitor for stuck queues This is iwlegacy version of: commit 342bbf3fee2fa9a18147e74b2e3c4229a4564912 Author: Johannes Berg Date: Sun Mar 4 08:50:46 2012 -0800 iwlwifi: always monitor for stuck queues If we only monitor while associated, the following can happen: - we're associated, and the queue stuck check runs, setting the queue "touch" time to X - we disassociate, stopping the monitoring, which leaves the time set to X - almost 2s later, we associate, and enqueue a frame - before the frame is transmitted, we monitor for stuck queues, and find the time set to X, although it is now later than X + 2000ms, so we decide that the queue is stuck and erroneously restart the device Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- drivers/net/wireless/iwlegacy/common.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c index cbf2dc18341f..5d4807c2b56d 100644 --- a/drivers/net/wireless/iwlegacy/common.c +++ b/drivers/net/wireless/iwlegacy/common.c @@ -4767,14 +4767,12 @@ il_bg_watchdog(unsigned long data) return; /* monitor and check for other stuck queues */ - if (il_is_any_associated(il)) { - for (cnt = 0; cnt < il->hw_params.max_txq_num; cnt++) { - /* skip as we already checked the command queue */ - if (cnt == il->cmd_queue) - continue; - if (il_check_stuck_queue(il, cnt)) - return; - } + for (cnt = 0; cnt < il->hw_params.max_txq_num; cnt++) { + /* skip as we already checked the command queue */ + if (cnt == il->cmd_queue) + continue; + if (il_check_stuck_queue(il, cnt)) + return; } mod_timer(&il->watchdog, -- cgit v1.2.3 From b48d96652626b315229b1b82c6270eead6a77a6d Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 4 Jul 2012 13:59:08 +0200 Subject: iwlegacy: don't mess up the SCD when removing a key When we remove a key, we put a key index which was supposed to tell the fw that we are actually removing the key. But instead the fw took that index as a valid index and messed up the SRAM of the device. This memory corruption on the device mangled the data of the SCD. The impact on the user is that SCD queue 2 got stuck after having removed keys. Reported-by: Paul Bolle Cc: stable@vger.kernel.org Signed-off-by: Emmanuel Grumbach Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- drivers/net/wireless/iwlegacy/4965-mac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c index 509301a5e7e2..ff5d689e13f3 100644 --- a/drivers/net/wireless/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/iwlegacy/4965-mac.c @@ -3405,7 +3405,7 @@ il4965_remove_dynamic_key(struct il_priv *il, return 0; } - if (il->stations[sta_id].sta.key.key_offset == WEP_INVALID_OFFSET) { + if (il->stations[sta_id].sta.key.key_flags & STA_KEY_FLG_INVALID) { IL_WARN("Removing wrong key %d 0x%x\n", keyconf->keyidx, key_flags); spin_unlock_irqrestore(&il->sta_lock, flags); @@ -3420,7 +3420,7 @@ il4965_remove_dynamic_key(struct il_priv *il, memset(&il->stations[sta_id].sta.key, 0, sizeof(struct il4965_keyinfo)); il->stations[sta_id].sta.key.key_flags = STA_KEY_FLG_NO_ENC | STA_KEY_FLG_INVALID; - il->stations[sta_id].sta.key.key_offset = WEP_INVALID_OFFSET; + il->stations[sta_id].sta.key.key_offset = keyconf->hw_key_idx; il->stations[sta_id].sta.sta.modify_mask = STA_MODIFY_KEY_MASK; il->stations[sta_id].sta.mode = STA_CONTROL_MODIFY_MSK; -- cgit v1.2.3 From 8e83989106562326bfd6aaf92174fe138efd026b Mon Sep 17 00:00:00 2001 From: Deepak Sikri Date: Sun, 8 Jul 2012 21:14:45 +0000 Subject: stmmac: Fix for nfs hang on multiple reboot It was observed that during multiple reboots nfs hangs. The status of receive descriptors shows that all the descriptors were in control of CPU, and none were assigned to DMA. Also the DMA status register confirmed that the Rx buffer is unavailable. This patch adds the fix for the same by adding the memory barriers to ascertain that the all instructions before enabling the Rx or Tx DMA are completed which involves the proper setting of the ownership bit in DMA descriptors. Signed-off-by: Deepak Sikri Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 51b3b68528ee..ea3003edde18 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1212,6 +1212,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion); wmb(); priv->hw->desc->set_tx_owner(desc); + wmb(); } /* Interrupt on completition only for the latest segment */ @@ -1227,6 +1228,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) /* To avoid raise condition */ priv->hw->desc->set_tx_owner(first); + wmb(); priv->cur_tx++; @@ -1290,6 +1292,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv) } wmb(); priv->hw->desc->set_rx_owner(p + entry); + wmb(); } } -- cgit v1.2.3 From 684901a6df1fb91fc9a2bdb89ffbebb241428d78 Mon Sep 17 00:00:00 2001 From: Deepak Sikri Date: Sun, 8 Jul 2012 21:14:46 +0000 Subject: stmmac: Fix for higher mtu size handling For the higher mtu sizes requiring the buffer size greater than 8192, the buffers are sent or received using multiple dma descriptors/ same descriptor with option of multi buffer handling. It was observed during tests that the driver was missing on data packets during the normal ping operations if the data buffers being used catered to jumbo frame handling. The memory barrriers are added in between preparation of dma descriptors in the jumbo frame handling path to ensure all instructions before enabling the dma are complete. Signed-off-by: Deepak Sikri Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/ring_mode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index fb8377da1687..4b785e10f2ed 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -51,7 +51,7 @@ static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) desc->des3 = desc->des2 + BUF_SIZE_4KiB; priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum); - + wmb(); entry = (++priv->cur_tx) % txsize; desc = priv->dma_tx + entry; @@ -59,6 +59,7 @@ static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) len, DMA_TO_DEVICE); desc->des3 = desc->des2 + BUF_SIZE_4KiB; priv->hw->desc->prepare_tx_desc(desc, 0, len, csum); + wmb(); priv->hw->desc->set_tx_owner(desc); priv->tx_skbuff[entry] = NULL; } else { -- cgit v1.2.3 From a64d49c3dd504b685f9742a2f3dcb11fb8e4345f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jul 2012 10:51:45 +0000 Subject: bonding: Manage /proc/net/bonding/ entries from the netdev events It was recently reported that moving a bonding device between network namespaces causes warnings from /proc. It turns out after the move we were trying to add and to remove the /proc/net/bonding entries from the wrong network namespace. Move the bonding /proc registration code into the NETDEV_REGISTER and NETDEV_UNREGISTER events where the proc registration and unregistration will always happen at the right time. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b9c2ae62166d..2ee76993f052 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3227,6 +3227,12 @@ static int bond_master_netdev_event(unsigned long event, switch (event) { case NETDEV_CHANGENAME: return bond_event_changename(event_bond); + case NETDEV_UNREGISTER: + bond_remove_proc_entry(event_bond); + break; + case NETDEV_REGISTER: + bond_create_proc_entry(event_bond); + break; default: break; } @@ -4411,8 +4417,6 @@ static void bond_uninit(struct net_device *bond_dev) bond_work_cancel_all(bond); - bond_remove_proc_entry(bond); - bond_debug_unregister(bond); __hw_addr_flush(&bond->mc_list); @@ -4814,7 +4818,6 @@ static int bond_init(struct net_device *bond_dev) bond_set_lockdep_class(bond_dev); - bond_create_proc_entry(bond); list_add_tail(&bond->bond_list, &bn->dev_list); bond_prepare_sysfs_group(bond); -- cgit v1.2.3 From 96ca7ffe748bf91f851e6aa4479aa11c8b1122ba Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jul 2012 10:52:43 +0000 Subject: bonding: debugfs and network namespaces are incompatible The bonding debugfs support has been broken in the presence of network namespaces since it has been added. The debugfs support does not handle multiple bonding devices with the same name in different network namespaces. I haven't had any bug reports, and I'm not interested in getting any. Disable the debugfs support when network namespaces are enabled. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- drivers/net/bonding/bond_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c index 3680aa251dea..2cf084eb9d52 100644 --- a/drivers/net/bonding/bond_debugfs.c +++ b/drivers/net/bonding/bond_debugfs.c @@ -6,7 +6,7 @@ #include "bonding.h" #include "bond_alb.h" -#ifdef CONFIG_DEBUG_FS +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_NET_NS) #include #include -- cgit v1.2.3 From 91c68ce2b26319248a32d7baa1226f819d283758 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Jul 2012 21:45:10 +0000 Subject: net: cgroup: fix out of bounds accesses dev->priomap is allocated by extend_netdev_table() called from update_netdev_tables(). And this is only called if write_priomap() is called. But if write_priomap() is not called, it seems we can have out of bounds accesses in cgrp_destroy(), read_priomap() & skb_update_prio() With help from Gao Feng Signed-off-by: Eric Dumazet Cc: Neil Horman Cc: Gao feng Acked-by: Gao feng Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++-- net/core/netprio_cgroup.c | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 84f01ba81a34..0f28a9e0b8ad 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2444,8 +2444,12 @@ static void skb_update_prio(struct sk_buff *skb) { struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); - if ((!skb->priority) && (skb->sk) && map) - skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx]; + if (!skb->priority && skb->sk && map) { + unsigned int prioidx = skb->sk->sk_cgrp_prioidx; + + if (prioidx < map->priomap_len) + skb->priority = map->priomap[prioidx]; + } } #else #define skb_update_prio(skb) diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index aa907ed466ea..3e953eaddbfc 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -142,7 +142,7 @@ static void cgrp_destroy(struct cgroup *cgrp) rtnl_lock(); for_each_netdev(&init_net, dev) { map = rtnl_dereference(dev->priomap); - if (map) + if (map && cs->prioidx < map->priomap_len) map->priomap[cs->prioidx] = 0; } rtnl_unlock(); @@ -166,7 +166,7 @@ static int read_priomap(struct cgroup *cont, struct cftype *cft, rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { map = rcu_dereference(dev->priomap); - priority = map ? map->priomap[prioidx] : 0; + priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0; cb->fill(cb, dev->name, priority); } rcu_read_unlock(); -- cgit v1.2.3 From 1b9faf5e66bae8428a4ccdc1447d5399d1014581 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 8 Jul 2012 01:37:38 +0000 Subject: drivers/isdn/mISDN/stack.c: remove invalid reference to list iterator variable If list_for_each_entry, etc complete a traversal of the list, the iterator variable ends up pointing to an address at an offset from the list head, and not a meaningful structure. Thus this value should not be used after the end of the iterator. The dereferences are just deleted from the debugging statement. This problem was found using Coccinelle (http://coccinelle.lip6.fr/). Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/isdn/mISDN/stack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c index 1a0ae4445ff2..5f21f629b7ae 100644 --- a/drivers/isdn/mISDN/stack.c +++ b/drivers/isdn/mISDN/stack.c @@ -135,8 +135,8 @@ send_layer2(struct mISDNstack *st, struct sk_buff *skb) skb = NULL; else if (*debug & DEBUG_SEND_ERR) printk(KERN_DEBUG - "%s ch%d mgr prim(%x) addr(%x) err %d\n", - __func__, ch->nr, hh->prim, ch->addr, ret); + "%s mgr prim(%x) err %d\n", + __func__, hh->prim, ret); } out: mutex_unlock(&st->lmutex); -- cgit v1.2.3 From cae296c42c94a27e0abdea96eb762752d1ba4908 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 8 Jul 2012 01:37:39 +0000 Subject: net/rxrpc/ar-peer.c: remove invalid reference to list iterator variable If list_for_each_entry, etc complete a traversal of the list, the iterator variable ends up pointing to an address at an offset from the list head, and not a meaningful structure. Thus this value should not be used after the end of the iterator. This seems to be a copy-paste bug from a previous debugging message, and so the meaningless value is just deleted. This problem was found using Coccinelle (http://coccinelle.lip6.fr/). Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/rxrpc/ar-peer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index 2754f098d436..bebaa43484bc 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -229,7 +229,7 @@ found_UDP_peer: return peer; new_UDP_peer: - _net("Rx UDP DGRAM from NEW peer %d", peer->debug_id); + _net("Rx UDP DGRAM from NEW peer"); read_unlock_bh(&rxrpc_peer_lock); _leave(" = -EBUSY [new]"); return ERR_PTR(-EBUSY); -- cgit v1.2.3 From 022f09784b85396b4ceba954ce28e50de4882281 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 8 Jul 2012 01:37:43 +0000 Subject: drivers/net/ethernet/broadcom/cnic.c: remove invalid reference to list iterator variable If list_for_each_entry, etc complete a traversal of the list, the iterator variable ends up pointing to an address at an offset from the list head, and not a meaningful structure. Thus this value should not be used after the end of the iterator. There does not seem to be a meaningful value to provide to netdev_warn. Replace with pr_warn, since pr_err is used elsewhere. This problem was found using Coccinelle (http://coccinelle.lip6.fr/). Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/cnic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index 3c95065e0def..2c89d17cbb29 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -534,7 +534,8 @@ int cnic_unregister_driver(int ulp_type) } if (atomic_read(&ulp_ops->ref_count) != 0) - netdev_warn(dev->netdev, "Failed waiting for ref count to go to zero\n"); + pr_warn("%s: Failed waiting for ref count to go to zero\n", + __func__); return 0; out_unlock: -- cgit v1.2.3 From 313b037cf054ec908de92fb4c085403ffd7420d4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Jul 2012 11:45:13 +0000 Subject: gianfar: fix potential sk_wmem_alloc imbalance commit db83d136d7f753 (gianfar: Fix missing sock reference when processing TX time stamps) added a potential sk_wmem_alloc imbalance If the new skb has a different truesize than old one, we can get a negative sk_wmem_alloc once new skb is orphaned at TX completion. Now we no longer early orphan skbs in dev_hard_start_xmit(), this probably can lead to fatal bugs. Signed-off-by: Eric Dumazet Tested-by: Paul Gortmaker Cc: Manfred Rudigier Cc: Claudiu Manoil Cc: Jiajun Wu Cc: Andy Fleming Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index f2db8fca46a1..ab1d80ff0791 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2063,10 +2063,9 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } - /* Steal sock reference for processing TX time stamps */ - swap(skb_new->sk, skb->sk); - swap(skb_new->destructor, skb->destructor); - kfree_skb(skb); + if (skb->sk) + skb_set_owner_w(skb_new, skb->sk); + consume_skb(skb); skb = skb_new; } -- cgit v1.2.3 From 1ba9a294141b106b7247649a5c3372d8284eca80 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 6 Jul 2012 15:07:48 +0100 Subject: x86/mm/mtrr: Fix alignment determination in range_to_mtrr() With the variable operated on being of "unsigned long" type, neither ffs() nor fls() are suitable to use on them, as those truncate their arguments to 32 bits. Using __ffs() and __fls() respectively at once eliminates the need to subtract 1 from their results. Additionally, with the alignment value subsequently used as a shift count, it must be enforced to be less than BITS_PER_LONG (and on 64-bit there's no need for it to be any smaller). Signed-off-by: Jan Beulich Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Yinghai Lu Link: http://lkml.kernel.org/r/4FF70D54020000780008E179@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/cleanup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index bdda2e6c673b..35ffda5d0727 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -258,11 +258,11 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, /* Compute the maximum size with which we can make a range: */ if (range_startk) - max_align = ffs(range_startk) - 1; + max_align = __ffs(range_startk); else - max_align = 32; + max_align = BITS_PER_LONG - 1; - align = fls(range_sizek) - 1; + align = __fls(range_sizek); if (align > max_align) align = max_align; -- cgit v1.2.3 From a7101d152665817bf7cafc47e7f5dcb1f54664fe Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 6 Jul 2012 15:20:35 +0100 Subject: x86/mm/mtrr: Slightly simplify print_mtrr_state() high_width can be easily calculated in a single expression when making use of __ffs64(). Signed-off-by: Jan Beulich Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Yinghai Lu Link: http://lkml.kernel.org/r/4FF71053020000780008E1B5@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 75772ae6c65f..e9fe907cd249 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -361,11 +361,7 @@ static void __init print_mtrr_state(void) } pr_debug("MTRR variable ranges %sabled:\n", mtrr_state.enabled & 2 ? "en" : "dis"); - if (size_or_mask & 0xffffffffUL) - high_width = ffs(size_or_mask & 0xffffffffUL) - 1; - else - high_width = ffs(size_or_mask>>32) + 32 - 1; - high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4; + high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4; for (i = 0; i < num_var_ranges; ++i) { if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) -- cgit v1.2.3 From efc73f4bbc238d4f579fb612c04c8e1dd8a82979 Mon Sep 17 00:00:00 2001 From: Amir Hanania Date: Mon, 9 Jul 2012 20:47:19 +0000 Subject: net: Fix memory leak - vlan_info struct In driver reload test there is a memory leak. The structure vlan_info was not freed when the driver was removed. It was not released since the nr_vids var is one after last vlan was removed. The nr_vids is one, since vlan zero is added to the interface when the interface is being set, but the vlan zero is not deleted at unregister. Fix - delete vlan zero when we unregister the device. Signed-off-by: Amir Hanania Acked-by: John Fastabend Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- net/8021q/vlan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 6089f0cf23b4..9096bcb08132 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -403,6 +403,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, break; case NETDEV_DOWN: + if (dev->features & NETIF_F_HW_VLAN_FILTER) + vlan_vid_del(dev, 0); + /* Put all VLANs for this dev in the down state too. */ for (i = 0; i < VLAN_N_VID; i++) { vlandev = vlan_group_get_device(grp, i); -- cgit v1.2.3 From b28ba72665356438e3a6e3be365c3c3071496840 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Jul 2012 10:03:41 +0000 Subject: IPoIB: fix skb truesize underestimatiom Or Gerlitz reported triggering of WARN_ON_ONCE(delta < len); in skb_try_coalesce() This warning tracks drivers that incorrectly set skb->truesize IPoIB indeed allocates a full page to store a fragment, but only accounts in skb->truesize the used part of the page (frame length) This patch fixes skb truesize underestimation, and also fixes a performance issue, because RX skbs have not enough tailroom to allow IP and TCP stacks to pull their header in skb linear part without an expensive call to pskb_expand_head() Signed-off-by: Eric Dumazet Reported-by: Or Gerlitz Cc: Erez Shitrit Cc: Shlomo Pongartz Cc: Roland Dreier Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 5c1bc995e560..f10221f40803 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -123,7 +123,7 @@ static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv, skb_frag_size_set(frag, size); skb->data_len += size; - skb->truesize += size; + skb->truesize += PAGE_SIZE; } else skb_put(skb, length); @@ -156,14 +156,18 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id) struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; int buf_size; + int tailroom; u64 *mapping; - if (ipoib_ud_need_sg(priv->max_ib_mtu)) + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { buf_size = IPOIB_UD_HEAD_SIZE; - else + tailroom = 128; /* reserve some tailroom for IP/TCP headers */ + } else { buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); + tailroom = 0; + } - skb = dev_alloc_skb(buf_size + 4); + skb = dev_alloc_skb(buf_size + tailroom + 4); if (unlikely(!skb)) return NULL; -- cgit v1.2.3 From c1f5163de417dab01fa9daaf09a74bbb19303f3c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 10 Jul 2012 10:04:40 +0000 Subject: bnx2: Fix bug in bnx2_free_tx_skbs(). In rare cases, bnx2x_free_tx_skbs() can unmap the wrong DMA address when it gets to the last entry of the tx ring. We were not using the proper macro to skip the last entry when advancing the tx index. Reported-by: Zongyun Lai Reviewed-by: Jeffrey Huang Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index ac7b74488531..1fa4927a45b1 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -5372,7 +5372,7 @@ bnx2_free_tx_skbs(struct bnx2 *bp) int k, last; if (skb == NULL) { - j++; + j = NEXT_TX_BD(j); continue; } @@ -5384,8 +5384,8 @@ bnx2_free_tx_skbs(struct bnx2 *bp) tx_buf->skb = NULL; last = tx_buf->nr_frags; - j++; - for (k = 0; k < last; k++, j++) { + j = NEXT_TX_BD(j); + for (k = 0; k < last; k++, j = NEXT_TX_BD(j)) { tx_buf = &txr->tx_buf_ring[TX_RING_IDX(j)]; dma_unmap_page(&bp->pdev->dev, dma_unmap_addr(tx_buf, mapping), -- cgit v1.2.3 From 93574fcc5b50cc7b8834698acb2ce947e5b6a5dc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 11 Jul 2012 09:35:08 +0300 Subject: tracing: Check for allocation failure in __tracing_open() Clean up and return -ENOMEM on if the kzalloc() fails. This also prevents a potential crash, as the pointer that failed to allocate would be later used. Link: http://lkml.kernel.org/r/20120711063507.GF11812@elgon.mountain Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Dan Carpenter Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 814ff306ae74..a120f98c4112 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2390,6 +2390,9 @@ __tracing_open(struct inode *inode, struct file *file) iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(), GFP_KERNEL); + if (!iter->buffer_iter) + goto release; + /* * We make a copy of the current tracer to avoid concurrent * changes on it while we are reading. @@ -2451,6 +2454,7 @@ __tracing_open(struct inode *inode, struct file *file) mutex_unlock(&trace_types_lock); kfree(iter->trace); kfree(iter->buffer_iter); +release: seq_release_private(inode, file); return ERR_PTR(-ENOMEM); } -- cgit v1.2.3 From 7ac2908e4b2edaec60e9090ddb4d9ceb76c05e7d Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 12 Jul 2012 03:39:11 +0000 Subject: sch_sfb: Fix missing NULL check Resolves-bug: https://bugzilla.kernel.org/show_bug.cgi?id=44461 Signed-off-by: Alan Cox Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_sfb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 74305c883bd3..30ea4674cabd 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -570,6 +570,8 @@ static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb) sch->qstats.backlog = q->qdisc->qstats.backlog; opts = nla_nest_start(skb, TCA_OPTIONS); + if (opts == NULL) + goto nla_put_failure; if (nla_put(skb, TCA_SFB_PARMS, sizeof(opt), &opt)) goto nla_put_failure; return nla_nest_end(skb, opts); -- cgit v1.2.3 From e3a746f5aab71f2dd0a83116772922fb37ae29d6 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 12 Jul 2012 07:40:42 +1000 Subject: xfs: really fix the cursor leak in xfs_alloc_ag_vextent_near The current cursor is reallocated when retrying the allocation, so the existing cursor needs to be destroyed in both the restart and the failure cases. Signed-off-by: Dave Chinner Tested-by: Mike Snitzer Signed-off-by: Ben Myers --- fs/xfs/xfs_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 9d1aeb7e2734..f654f51b0c67 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -1074,13 +1074,13 @@ restart: * If we couldn't get anything, give up. */ if (bno_cur_lt == NULL && bno_cur_gt == NULL) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + if (!forced++) { trace_xfs_alloc_near_busy(args); xfs_log_force(args->mp, XFS_LOG_SYNC); goto restart; } - - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); trace_xfs_alloc_size_neither(args); args->agbno = NULLAGBLOCK; return 0; -- cgit v1.2.3 From aa292847b9fc6e187547110de833a7d3131bbddf Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 12 Jul 2012 07:40:43 +1000 Subject: xfs: don't defer metadata allocation to the workqueue Almost all metadata allocations come from shallow stack usage situations. Avoid the overhead of switching the allocation to a workqueue as we are not in danger of running out of stack when making these allocations. Metadata allocations are already marked through the args that are passed down, so this is trivial to do. Signed-off-by: Dave Chinner Reported-by: Mel Gorman Tested-by: Mel Gorman Signed-off-by: Ben Myers --- fs/xfs/xfs_alloc.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index f654f51b0c67..4f33c32affe3 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2434,13 +2434,22 @@ xfs_alloc_vextent_worker( current_restore_flags_nested(&pflags, PF_FSTRANS); } - -int /* error */ +/* + * Data allocation requests often come in with little stack to work on. Push + * them off to a worker thread so there is lots of stack to use. Metadata + * requests, OTOH, are generally from low stack usage paths, so avoid the + * context switch overhead here. + */ +int xfs_alloc_vextent( - xfs_alloc_arg_t *args) /* allocation argument structure */ + struct xfs_alloc_arg *args) { DECLARE_COMPLETION_ONSTACK(done); + if (!args->userdata) + return __xfs_alloc_vextent(args); + + args->done = &done; INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker); queue_work(xfs_alloc_wq, &args->work); -- cgit v1.2.3 From 40a9b7963df32e743c45d79a5f41445fe2476f15 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Jul 2012 06:00:04 -0400 Subject: xfs: prevent recursion in xfs_buf_iorequest If the b_iodone handler is run in calling context in xfs_buf_iorequest we can run into a recursion where xfs_buf_iodone_callbacks keeps calling back into xfs_buf_iorequest because an I/O error happened, which keeps calling back into xfs_buf_iorequest. This chain will usually not take long because the filesystem gets shut down because of log I/O errors, but even over a short time it can cause stack overflows if run on the same context. As a short term workaround make sure we always call the iodone handler in workqueue context. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index a4beb421018a..687b275f165c 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1243,7 +1243,7 @@ xfs_buf_iorequest( */ atomic_set(&bp->b_io_remaining, 1); _xfs_buf_ioapply(bp); - _xfs_buf_ioend(bp, 0); + _xfs_buf_ioend(bp, 1); xfs_buf_rele(bp); } -- cgit v1.2.3 From 1632dcc93f55f9ab407b373da1957a727b1a7fe3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Jul 2012 02:24:10 -0400 Subject: xfs: do not call xfs_bdstrat_cb in xfs_buf_iodone_callbacks xfs_bdstrat_cb only adds a check for a shutdown filesystem over xfs_buf_iorequest, but xfs_buf_iodone_callbacks just checked for a shut down filesystem a little earlier. In addition the shutdown handling in xfs_bdstrat_cb is not very suitable for this caller. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_buf.c | 51 ++++++++++++++++++++++----------------------------- fs/xfs/xfs_buf.h | 1 - fs/xfs/xfs_buf_item.c | 2 +- 3 files changed, 23 insertions(+), 31 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 687b275f165c..269b35c084da 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -989,27 +989,6 @@ xfs_buf_ioerror_alert( (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length); } -int -xfs_bwrite( - struct xfs_buf *bp) -{ - int error; - - ASSERT(xfs_buf_islocked(bp)); - - bp->b_flags |= XBF_WRITE; - bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); - - xfs_bdstrat_cb(bp); - - error = xfs_buf_iowait(bp); - if (error) { - xfs_force_shutdown(bp->b_target->bt_mount, - SHUTDOWN_META_IO_ERROR); - } - return error; -} - /* * Called when we want to stop a buffer from getting written or read. * We attach the EIO error, muck with its flags, and call xfs_buf_ioend @@ -1079,14 +1058,7 @@ xfs_bioerror_relse( return EIO; } - -/* - * All xfs metadata buffers except log state machine buffers - * get this attached as their b_bdstrat callback function. - * This is so that we can catch a buffer - * after prematurely unpinning it to forcibly shutdown the filesystem. - */ -int +STATIC int xfs_bdstrat_cb( struct xfs_buf *bp) { @@ -1107,6 +1079,27 @@ xfs_bdstrat_cb( return 0; } +int +xfs_bwrite( + struct xfs_buf *bp) +{ + int error; + + ASSERT(xfs_buf_islocked(bp)); + + bp->b_flags |= XBF_WRITE; + bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); + + xfs_bdstrat_cb(bp); + + error = xfs_buf_iowait(bp); + if (error) { + xfs_force_shutdown(bp->b_target->bt_mount, + SHUTDOWN_META_IO_ERROR); + } + return error; +} + /* * Wrapper around bdstrat so that we can stop data from going to disk in case * we are shutting down the filesystem. Typically user data goes thru this diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 7f1d1392ce37..79344c48008e 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -180,7 +180,6 @@ extern void xfs_buf_unlock(xfs_buf_t *); extern int xfs_bwrite(struct xfs_buf *bp); extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); -extern int xfs_bdstrat_cb(struct xfs_buf *); extern void xfs_buf_ioend(xfs_buf_t *, int); extern void xfs_buf_ioerror(xfs_buf_t *, int); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 45df2b857d48..d9e451115f98 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -954,7 +954,7 @@ xfs_buf_iodone_callbacks( if (!XFS_BUF_ISSTALE(bp)) { bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; - xfs_bdstrat_cb(bp); + xfs_buf_iorequest(bp); } else { xfs_buf_relse(bp); } -- cgit v1.2.3 From d0efa8f23a644f7cb7d1f8e78dd9a223efa412a3 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Thu, 12 Jul 2012 08:56:56 +0000 Subject: e1000e: Correct link check logic for 82571 serdes SYNCH bit and IV bit of RXCW register are sticky. Before examining these bits, RXCW should be read twice to filter out one-time false events and have correct values for these bits. Incorrect values of these bits in link check logic can cause weird link stability issues if auto-negotiation fails. CC: stable [2.6.38+] Reported-by: Dean Nelson Signed-off-by: Tushar Dave Reviewed-by: Bruce Allan Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/82571.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 36db4df09aed..1f063dcd8f85 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -1572,6 +1572,9 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw) ctrl = er32(CTRL); status = er32(STATUS); rxcw = er32(RXCW); + /* SYNCH bit and IV bit are sticky */ + udelay(10); + rxcw = er32(RXCW); if ((rxcw & E1000_RXCW_SYNCH) && !(rxcw & E1000_RXCW_IV)) { -- cgit v1.2.3 From a52359b56c29f55aaadf1dab80a0e1043b982676 Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Sat, 14 Jul 2012 04:23:58 +0000 Subject: e1000e: fix test for PHY being accessible on 82577/8/9 and I217 Occasionally, the PHY can be initially inaccessible when the first read of a PHY register, e.g. PHY_ID1, happens (signified by the returned value 0xFFFF) but subsequent accesses of the PHY work as expected. Add a retry counter similar to how it is done in the generic e1000_get_phy_id(). Also, when the PHY is completely inaccessible (i.e. when subsequent reads of the PHY_IDx registers returns all F's) and the MDIO access mode must be set to slow before attempting to read the PHY ID again, the functions that do these latter two actions expect the SW/FW/HW semaphore is not already set so the semaphore must be released before and re-acquired after calling them otherwise there is an unnecessarily inordinate amount of delay during device initialization. Reported-by: Heikki Krogerus Signed-off-by: Bruce Allan Tested-by: Heikki Krogerus Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 42 ++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 238ab2f8a5e7..e3a7b07df629 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -325,24 +325,46 @@ static inline void __ew32flash(struct e1000_hw *hw, unsigned long reg, u32 val) **/ static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw) { - u16 phy_reg; - u32 phy_id; + u16 phy_reg = 0; + u32 phy_id = 0; + s32 ret_val; + u16 retry_count; + + for (retry_count = 0; retry_count < 2; retry_count++) { + ret_val = e1e_rphy_locked(hw, PHY_ID1, &phy_reg); + if (ret_val || (phy_reg == 0xFFFF)) + continue; + phy_id = (u32)(phy_reg << 16); - e1e_rphy_locked(hw, PHY_ID1, &phy_reg); - phy_id = (u32)(phy_reg << 16); - e1e_rphy_locked(hw, PHY_ID2, &phy_reg); - phy_id |= (u32)(phy_reg & PHY_REVISION_MASK); + ret_val = e1e_rphy_locked(hw, PHY_ID2, &phy_reg); + if (ret_val || (phy_reg == 0xFFFF)) { + phy_id = 0; + continue; + } + phy_id |= (u32)(phy_reg & PHY_REVISION_MASK); + break; + } if (hw->phy.id) { if (hw->phy.id == phy_id) return true; - } else { - if ((phy_id != 0) && (phy_id != PHY_REVISION_MASK)) - hw->phy.id = phy_id; + } else if (phy_id) { + hw->phy.id = phy_id; + hw->phy.revision = (u32)(phy_reg & ~PHY_REVISION_MASK); return true; } - return false; + /* + * In case the PHY needs to be in mdio slow mode, + * set slow mode and try to get the PHY id again. + */ + hw->phy.ops.release(hw); + ret_val = e1000_set_mdio_slow_mode_hv(hw); + if (!ret_val) + ret_val = e1000e_get_phy_id(hw); + hw->phy.ops.acquire(hw); + + return !ret_val; } /** -- cgit v1.2.3 From a6e7360b22f2360d7640f99051415d49cebeb908 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Thu, 21 Jun 2012 18:10:35 +0800 Subject: pinctrl: pinctrl-imx: only print debug message when DEBUG is defined Fix regression for commit 3a86a5f8 (pinctrl: pinctrl-imx: free allocated pinctrl_map structure only once and use kernel facilities for IMX_PMX_DUMP) introduced in 3.5-rc3. With above commit, the debug code will alway be excuted. Change to excute it only when DEBUG is defined. Signed-off-by: Dong Aisheng Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-imx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/pinctrl-imx.c b/drivers/pinctrl/pinctrl-imx.c index dd6d93aa5334..90c837f469a6 100644 --- a/drivers/pinctrl/pinctrl-imx.c +++ b/drivers/pinctrl/pinctrl-imx.c @@ -474,7 +474,9 @@ static int __devinit imx_pinctrl_parse_groups(struct device_node *np, grp->configs[j] = config & ~IMX_PAD_SION; } +#ifdef DEBUG IMX_PMX_DUMP(info, grp->pins, grp->mux_mode, grp->configs, grp->npins); +#endif return 0; } -- cgit v1.2.3 From 4a5f7eff8b0b34354e5c63272835e5e2dfe1c933 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Fri, 6 Jul 2012 17:09:23 +0800 Subject: pinctrl: pinctrl-imx6q: add missed mux function for USBOTG_ID The original pin registers table is derived from u-boot mainline, but somehow it was found missing some mux functions for USBOTG_ID. We added it at the bottom by following the exist pin function ids, then it will not break the exist using of pin function id in dts file. Reported-by: Richard Zhao Signed-off-by: Dong Aisheng --- Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt | 2 ++ drivers/pinctrl/pinctrl-imx6q.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt index 82b43f915857..a4119f6422d9 100644 --- a/Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt @@ -1626,3 +1626,5 @@ MX6Q_PAD_SD2_DAT3__PCIE_CTRL_MUX_11 1587 MX6Q_PAD_SD2_DAT3__GPIO_1_12 1588 MX6Q_PAD_SD2_DAT3__SJC_DONE 1589 MX6Q_PAD_SD2_DAT3__ANATOP_TESTO_3 1590 +MX6Q_PAD_ENET_RX_ER__ANATOP_USBOTG_ID 1591 +MX6Q_PAD_GPIO_1__ANATOP_USBOTG_ID 1592 diff --git a/drivers/pinctrl/pinctrl-imx6q.c b/drivers/pinctrl/pinctrl-imx6q.c index 7737d4d71a3c..e9bf71fbedca 100644 --- a/drivers/pinctrl/pinctrl-imx6q.c +++ b/drivers/pinctrl/pinctrl-imx6q.c @@ -1950,6 +1950,8 @@ static struct imx_pin_reg imx6q_pin_regs[] = { IMX_PIN_REG(MX6Q_PAD_SD2_DAT3, 0x0744, 0x035C, 5, 0x0000, 0), /* MX6Q_PAD_SD2_DAT3__GPIO_1_12 */ IMX_PIN_REG(MX6Q_PAD_SD2_DAT3, 0x0744, 0x035C, 6, 0x0000, 0), /* MX6Q_PAD_SD2_DAT3__SJC_DONE */ IMX_PIN_REG(MX6Q_PAD_SD2_DAT3, 0x0744, 0x035C, 7, 0x0000, 0), /* MX6Q_PAD_SD2_DAT3__ANATOP_TESTO_3 */ + IMX_PIN_REG(MX6Q_PAD_ENET_RX_ER, 0x04EC, 0x01D8, 0, 0x0000, 0), /* MX6Q_PAD_ENET_RX_ER__ANATOP_USBOTG_ID */ + IMX_PIN_REG(MX6Q_PAD_GPIO_1, 0x05F4, 0x0224, 3, 0x0000, 0), /* MX6Q_PAD_GPIO_1__ANATOP_USBOTG_ID */ }; /* Pad names for the pinmux subsystem */ -- cgit v1.2.3 From 3cc5d2a6b9a2fd1bf024aa5e52dd22961eecaf13 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 13 Jul 2012 18:18:04 -0700 Subject: tcm_fc: Fix crash seen with aborts and large reads This patch fixes a crash seen when large reads have their exchange aborted by either timing out or being reset. Because the exchange abort results in the seq pointer being set to NULL, because the sequence is no longer valid, it must not be dereferenced. This patch changes the function ft_get_task_tag to return ~0 if it is unable to get the tag for this reason. Because the get_task_tag interface provides no means of returning an error, this seems like the best way to fix this issue at the moment. Signed-off-by: Mark Rustad Cc: Signed-off-by: Nicholas Bellinger --- drivers/target/tcm_fc/tfc_cmd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index f03fb9730f5b..5b65f33939a8 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -230,6 +230,8 @@ u32 ft_get_task_tag(struct se_cmd *se_cmd) { struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd); + if (cmd->aborted) + return ~0; return fc_seq_exch(cmd->seq)->rxid; } -- cgit v1.2.3 From 64919e60915c5151b3dd4c8d2d9237a115ca990c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 6 Jul 2012 14:13:29 -0400 Subject: SELinux: include definition of new capabilities The kernel has added CAP_WAKE_ALARM and CAP_EPOLLWAKEUP. We need to define these in SELinux so they can be mediated by policy. Signed-off-by: Eric Paris Signed-off-by: James Morris --- security/selinux/include/classmap.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index b8c53723e09b..0b04fd9e9e3e 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -145,7 +145,9 @@ struct security_class_mapping secclass_map[] = { "node_bind", "name_connect", NULL } }, { "memprotect", { "mmap_zero", NULL } }, { "peer", { "recv", NULL } }, - { "capability2", { "mac_override", "mac_admin", "syslog", NULL } }, + { "capability2", + { "mac_override", "mac_admin", "syslog", "wake_alarm", "epollwakeup", + NULL } }, { "kernel_service", { "use_as_override", "create_files_as", NULL } }, { "tun_socket", { COMMON_SOCK_PERMS, NULL } }, -- cgit v1.2.3 From 3d2195c3324b27e65ba53d9626a6bd91a2515797 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 6 Jul 2012 14:13:30 -0400 Subject: SELinux: do not check open perms if they are not known to policy When I introduced open perms policy didn't understand them and I implemented them as a policycap. When I added the checking of open perm to truncate I forgot to conditionalize it on the userspace defined policy capability. Running an old policy with a new kernel will not check open on open(2) but will check it on truncate. Conditionalize the truncate check the same as the open check. Signed-off-by: Eric Paris Cc: stable@vger.kernel.org # 3.4.x Signed-off-by: James Morris --- security/selinux/hooks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 372ec6502aa8..ffd8900a38e8 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2717,7 +2717,7 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr) ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET)) return dentry_has_perm(cred, dentry, FILE__SETATTR); - if (ia_valid & ATTR_SIZE) + if (selinux_policycap_openperm && (ia_valid & ATTR_SIZE)) av |= FILE__OPEN; return dentry_has_perm(cred, dentry, av); -- cgit v1.2.3 From 46c87852e99cf8ce97e207b11cde19085837e39c Mon Sep 17 00:00:00 2001 From: Prathyush K Date: Mon, 16 Jul 2012 08:59:55 +0200 Subject: ARM: dma-mapping: modify condition check while freeing pages WARNING: at mm/vmalloc.c:1471 __iommu_free_buffer+0xcc/0xd0() Trying to vfree() nonexistent vm area (ef095000) Modules linked in: [] (unwind_backtrace+0x0/0xfc) from [] (warn_slowpath_common+0x54/0x64) [] (warn_slowpath_common+0x54/0x64) from [] (warn_slowpath_fmt+0x30/0x40) [] (warn_slowpath_fmt+0x30/0x40) from [] (__iommu_free_buffer+0xcc/0xd0) [] (__iommu_free_buffer+0xcc/0xd0) from [] (exynos_drm_free_buf+0xe4/0x138) [] (exynos_drm_free_buf+0xe4/0x138) from [] (exynos_drm_gem_destroy+0x80/0xfc) [] (exynos_drm_gem_destroy+0x80/0xfc) from [] (drm_gem_object_free+0x28/0x34) [] (drm_gem_object_free+0x28/0x34) from [] (drm_gem_object_release_handle+0xcc/0xd8) [] (drm_gem_object_release_handle+0xcc/0xd8) from [] (idr_for_each+0x74/0xb8) [] (idr_for_each+0x74/0xb8) from [] (drm_gem_release+0x1c/0x30) [] (drm_gem_release+0x1c/0x30) from [] (drm_release+0x608/0x694) [] (drm_release+0x608/0x694) from [] (fput+0xb8/0x228) [] (fput+0xb8/0x228) from [] (filp_close+0x64/0x84) [] (filp_close+0x64/0x84) from [] (put_files_struct+0xe8/0x104) [] (put_files_struct+0xe8/0x104) from [] (do_exit+0x608/0x774) [] (do_exit+0x608/0x774) from [] (do_group_exit+0x48/0xb4) [] (do_group_exit+0x48/0xb4) from [] (sys_exit_group+0x10/0x18) [] (sys_exit_group+0x10/0x18) from [] (ret_fast_syscall+0x0/0x30) This patch modifies the condition while freeing to match the condition used while allocation. This fixes the above warning which arises when array size is equal to PAGE_SIZE where allocation is done using kzalloc but free is done using vfree. Signed-off-by: Prathyush K Signed-off-by: Marek Szyprowski --- arch/arm/mm/dma-mapping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 4044abcf6f9d..655878bcc96d 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1091,7 +1091,7 @@ error: while (--i) if (pages[i]) __free_pages(pages[i], 0); - if (array_size < PAGE_SIZE) + if (array_size <= PAGE_SIZE) kfree(pages); else vfree(pages); @@ -1106,7 +1106,7 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t s for (i = 0; i < count; i++) if (pages[i]) __free_pages(pages[i], 0); - if (array_size < PAGE_SIZE) + if (array_size <= PAGE_SIZE) kfree(pages); else vfree(pages); -- cgit v1.2.3 From 310e158cc3b7a6adf41e778d52be746c4dc88561 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Jul 2012 13:15:52 +0200 Subject: net: respect GFP_DMA in __netdev_alloc_skb() Few drivers use GFP_DMA allocations, and netdev_alloc_frag() doesn't allocate pages in DMA zone. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 46a3d23d259e..d124306b81fd 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -353,7 +353,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - if (fragsz <= PAGE_SIZE && !(gfp_mask & __GFP_WAIT)) { + if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { void *data = netdev_alloc_frag(fragsz); if (likely(data)) { -- cgit v1.2.3 From 05d290d66be6ef77a0b962ebecf01911bd984a78 Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Sun, 15 Jul 2012 17:14:25 -0400 Subject: fifo: Do not restart open() if it already found a partner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a parent and child process open the two ends of a fifo, and the child immediately exits, the parent may receive a SIGCHLD before its open() returns. In that case, we need to make sure that open() will return successfully after the SIGCHLD handler returns, instead of throwing EINTR or being restarted. Otherwise, the restarted open() would incorrectly wait for a second partner on the other end. The following test demonstrates the EINTR that was wrongly thrown from the parent’s open(). Change .sa_flags = 0 to .sa_flags = SA_RESTART to see a deadlock instead, in which the restarted open() waits for a second reader that will never come. (On my systems, this happens pretty reliably within about 5 to 500 iterations. Others report that it manages to loop ~forever sometimes; YMMV.) #include #include #include #include #include #include #include #include #define CHECK(x) do if ((x) == -1) {perror(#x); abort();} while(0) void handler(int signum) {} int main() { struct sigaction act = {.sa_handler = handler, .sa_flags = 0}; CHECK(sigaction(SIGCHLD, &act, NULL)); CHECK(mknod("fifo", S_IFIFO | S_IRWXU, 0)); for (;;) { int fd; pid_t pid; putc('.', stderr); CHECK(pid = fork()); if (pid == 0) { CHECK(fd = open("fifo", O_RDONLY)); _exit(0); } CHECK(fd = open("fifo", O_WRONLY)); CHECK(close(fd)); CHECK(waitpid(pid, NULL, 0)); } } This is what I suspect was causing the Git test suite to fail in t9010-svn-fe.sh: http://bugs.debian.org/678852 Signed-off-by: Anders Kaseorg Reviewed-by: Jonathan Nieder Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/fifo.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/fifo.c b/fs/fifo.c index b1a524d798e7..cf6f4345ceb0 100644 --- a/fs/fifo.c +++ b/fs/fifo.c @@ -14,7 +14,7 @@ #include #include -static void wait_for_partner(struct inode* inode, unsigned int *cnt) +static int wait_for_partner(struct inode* inode, unsigned int *cnt) { int cur = *cnt; @@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt) if (signal_pending(current)) break; } + return cur == *cnt ? -ERESTARTSYS : 0; } static void wake_up_partner(struct inode* inode) @@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp) * seen a writer */ filp->f_version = pipe->w_counter; } else { - wait_for_partner(inode, &pipe->w_counter); - if(signal_pending(current)) + if (wait_for_partner(inode, &pipe->w_counter)) goto err_rd; } } @@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp) wake_up_partner(inode); if (!pipe->readers) { - wait_for_partner(inode, &pipe->r_counter); - if (signal_pending(current)) + if (wait_for_partner(inode, &pipe->r_counter)) goto err_wr; } break; -- cgit v1.2.3 From f507598b06ab00fb46495ccdeeb3ef9c1dc43dee Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 16 Jul 2012 17:51:50 +0100 Subject: gma500: Fix lid related crash We now set up the lid timer before we set up the backlight. On some devices that causes a crash as we do a backlight change before or during the setup. As this fixes a crash on boot regression on some setups it ought to go in ASAP, especially as all the user gets is a blank screen. Signed-off-by: Alan Cox Tested-by: Mattia Dongili Signed-off-by: Linus Torvalds --- drivers/gpu/drm/gma500/psb_device.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/gma500/psb_device.c b/drivers/gpu/drm/gma500/psb_device.c index eff039bf92d4..5971bc82b765 100644 --- a/drivers/gpu/drm/gma500/psb_device.c +++ b/drivers/gpu/drm/gma500/psb_device.c @@ -144,6 +144,10 @@ static int psb_backlight_init(struct drm_device *dev) psb_backlight_device->props.max_brightness = 100; backlight_update_status(psb_backlight_device); dev_priv->backlight_device = psb_backlight_device; + + /* This must occur after the backlight is properly initialised */ + psb_lid_timer_init(dev_priv); + return 0; } @@ -354,13 +358,6 @@ static int psb_chip_setup(struct drm_device *dev) return 0; } -/* Not exactly an erratum more an irritation */ -static void psb_chip_errata(struct drm_device *dev) -{ - struct drm_psb_private *dev_priv = dev->dev_private; - psb_lid_timer_init(dev_priv); -} - static void psb_chip_teardown(struct drm_device *dev) { struct drm_psb_private *dev_priv = dev->dev_private; @@ -379,7 +376,6 @@ const struct psb_ops psb_chip_ops = { .sgx_offset = PSB_SGX_OFFSET, .chip_setup = psb_chip_setup, .chip_teardown = psb_chip_teardown, - .errata = psb_chip_errata, .crtc_helper = &psb_intel_helper_funcs, .crtc_funcs = &psb_intel_crtc_funcs, -- cgit v1.2.3 From 166973e506231c496b4427760bcb5a9860422850 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 16 Jul 2012 17:52:45 +0100 Subject: gma500: move the ASLE enable Otherwise we end up getting the masks wrong, can get events before we are doing power control and other ungood things. Again this is a regression fix where the ordering of handling was disturbed by other work, and the user experience on some boxes is a blank screen. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/gpu/drm/gma500/opregion.c | 8 +++----- drivers/gpu/drm/gma500/opregion.h | 5 +++++ drivers/gpu/drm/gma500/psb_drv.c | 1 + 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/gma500/opregion.c b/drivers/gpu/drm/gma500/opregion.c index 4f186eca3a30..c430bd424681 100644 --- a/drivers/gpu/drm/gma500/opregion.c +++ b/drivers/gpu/drm/gma500/opregion.c @@ -144,6 +144,8 @@ struct opregion_asle { #define ASLE_CBLV_VALID (1<<31) +static struct psb_intel_opregion *system_opregion; + static u32 asle_set_backlight(struct drm_device *dev, u32 bclp) { struct drm_psb_private *dev_priv = dev->dev_private; @@ -205,7 +207,7 @@ void psb_intel_opregion_enable_asle(struct drm_device *dev) struct drm_psb_private *dev_priv = dev->dev_private; struct opregion_asle *asle = dev_priv->opregion.asle; - if (asle) { + if (asle && system_opregion ) { /* Don't do this on Medfield or other non PC like devices, they use the bit for something different altogether */ psb_enable_pipestat(dev_priv, 0, PIPE_LEGACY_BLC_EVENT_ENABLE); @@ -221,7 +223,6 @@ void psb_intel_opregion_enable_asle(struct drm_device *dev) #define ACPI_EV_LID (1<<1) #define ACPI_EV_DOCK (1<<2) -static struct psb_intel_opregion *system_opregion; static int psb_intel_opregion_video_event(struct notifier_block *nb, unsigned long val, void *data) @@ -266,9 +267,6 @@ void psb_intel_opregion_init(struct drm_device *dev) system_opregion = opregion; register_acpi_notifier(&psb_intel_opregion_notifier); } - - if (opregion->asle) - psb_intel_opregion_enable_asle(dev); } void psb_intel_opregion_fini(struct drm_device *dev) diff --git a/drivers/gpu/drm/gma500/opregion.h b/drivers/gpu/drm/gma500/opregion.h index 72dc6b921265..4a90f8b0e16c 100644 --- a/drivers/gpu/drm/gma500/opregion.h +++ b/drivers/gpu/drm/gma500/opregion.h @@ -27,6 +27,7 @@ extern void psb_intel_opregion_asle_intr(struct drm_device *dev); extern void psb_intel_opregion_init(struct drm_device *dev); extern void psb_intel_opregion_fini(struct drm_device *dev); extern int psb_intel_opregion_setup(struct drm_device *dev); +extern void psb_intel_opregion_enable_asle(struct drm_device *dev); #else @@ -46,4 +47,8 @@ extern inline int psb_intel_opregion_setup(struct drm_device *dev) { return 0; } + +extern inline void psb_intel_opregion_enable_asle(struct drm_device *dev) +{ +} #endif diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c index caba6e08693c..a8858a907f47 100644 --- a/drivers/gpu/drm/gma500/psb_drv.c +++ b/drivers/gpu/drm/gma500/psb_drv.c @@ -374,6 +374,7 @@ static int psb_driver_load(struct drm_device *dev, unsigned long chipset) if (ret) return ret; + psb_intel_opregion_enable_asle(dev); #if 0 /*enable runtime pm at last*/ pm_runtime_enable(&dev->pdev->dev); -- cgit v1.2.3 From 64691959401601a1514c39cf0131533a4be53c12 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 16 Jul 2012 17:54:03 +0100 Subject: gma500,cdv: Fix the brightness base Some desktop environments carefully save and restore the brightness settings from the previous boot. Unfortunately they don't all check to see if the range has changed. The end result is that they restore a brightness of 100/lots not 100/100. As the old driver and the non-free GMA36xx driver both use 0-100 we thus need to go back doing the same thing to avoid users getting a mysterious black screen after boot. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/gpu/drm/gma500/cdv_device.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/gma500/cdv_device.c b/drivers/gpu/drm/gma500/cdv_device.c index 9764045428ce..b7e7b49d8f62 100644 --- a/drivers/gpu/drm/gma500/cdv_device.c +++ b/drivers/gpu/drm/gma500/cdv_device.c @@ -78,21 +78,6 @@ static int cdv_backlight_combination_mode(struct drm_device *dev) return REG_READ(BLC_PWM_CTL2) & PWM_LEGACY_MODE; } -static int cdv_get_brightness(struct backlight_device *bd) -{ - struct drm_device *dev = bl_get_data(bd); - u32 val = REG_READ(BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK; - - if (cdv_backlight_combination_mode(dev)) { - u8 lbpc; - - val &= ~1; - pci_read_config_byte(dev->pdev, 0xF4, &lbpc); - val *= lbpc; - } - return val; -} - static u32 cdv_get_max_backlight(struct drm_device *dev) { u32 max = REG_READ(BLC_PWM_CTL); @@ -110,6 +95,22 @@ static u32 cdv_get_max_backlight(struct drm_device *dev) return max; } +static int cdv_get_brightness(struct backlight_device *bd) +{ + struct drm_device *dev = bl_get_data(bd); + u32 val = REG_READ(BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK; + + if (cdv_backlight_combination_mode(dev)) { + u8 lbpc; + + val &= ~1; + pci_read_config_byte(dev->pdev, 0xF4, &lbpc); + val *= lbpc; + } + return (val * 100)/cdv_get_max_backlight(dev); + +} + static int cdv_set_brightness(struct backlight_device *bd) { struct drm_device *dev = bl_get_data(bd); @@ -120,6 +121,9 @@ static int cdv_set_brightness(struct backlight_device *bd) if (level < 1) level = 1; + level *= cdv_get_max_backlight(dev); + level /= 100; + if (cdv_backlight_combination_mode(dev)) { u32 max = cdv_get_max_backlight(dev); u8 lbpc; @@ -157,7 +161,6 @@ static int cdv_backlight_init(struct drm_device *dev) cdv_backlight_device->props.brightness = cdv_get_brightness(cdv_backlight_device); - cdv_backlight_device->props.max_brightness = cdv_get_max_backlight(dev); backlight_update_status(cdv_backlight_device); dev_priv->backlight_device = cdv_backlight_device; return 0; -- cgit v1.2.3 From 3e997130bd2e8c6f5aaa49d6e3161d4d29b43ab0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Jul 2012 12:50:42 -0400 Subject: timekeeping: Add missing update call in timekeeping_resume() The leap second rework unearthed another issue of inconsistent data. On timekeeping_resume() the timekeeper data is updated, but nothing calls timekeeping_update(), so now the update code in the timer interrupt sees stale values. This has been the case before those changes, but then the timer interrupt was using stale data as well so this went unnoticed for quite some time. Add the missing update call, so all the data is consistent everywhere. Reported-by: Andreas Schwab Reported-and-tested-by: "Rafael J. Wysocki" Reported-and-tested-by: Martin Steigerwald Cc: LKML Cc: Linux PM list Cc: John Stultz Cc: Ingo Molnar Cc: Peter Zijlstra , Cc: Prarit Bhargava Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz Signed-off-by: Linus Torvalds --- kernel/time/timekeeping.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 269b1fe5f2ae..3447cfaf11e7 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -717,6 +717,7 @@ static void timekeeping_resume(void) timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); timekeeper.ntp_error = 0; timekeeping_suspended = 0; + timekeeping_update(false); write_sequnlock_irqrestore(&timekeeper.lock, flags); touch_softlockup_watchdog(); -- cgit v1.2.3 From d35212f3ca3bf4fb49d15e37f530c9931e2d2183 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 16 Jul 2012 15:17:10 -0700 Subject: target: Clean up returning errors in PR handling code - instead of (PTR_ERR(file) < 0) just use IS_ERR(file) - return -EINVAL instead of EINVAL - all other error returns in target_scsi3_emulate_pr_out() use "goto out" -- get rid of the one remaining straight "return." Signed-off-by: Roland Dreier Cc: Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_pr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 85564998500a..a1bcd927a9e6 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -2031,7 +2031,7 @@ static int __core_scsi3_write_aptpl_to_file( if (IS_ERR(file) || !file || !file->f_dentry) { pr_err("filp_open(%s) for APTPL metadata" " failed\n", path); - return (PTR_ERR(file) < 0 ? PTR_ERR(file) : -ENOENT); + return IS_ERR(file) ? PTR_ERR(file) : -ENOENT; } iov[0].iov_base = &buf[0]; @@ -3818,7 +3818,7 @@ int target_scsi3_emulate_pr_out(struct se_cmd *cmd) " SPC-2 reservation is held, returning" " RESERVATION_CONFLICT\n"); cmd->scsi_sense_reason = TCM_RESERVATION_CONFLICT; - ret = EINVAL; + ret = -EINVAL; goto out; } @@ -3828,7 +3828,8 @@ int target_scsi3_emulate_pr_out(struct se_cmd *cmd) */ if (!cmd->se_sess) { cmd->scsi_sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - return -EINVAL; + ret = -EINVAL; + goto out; } if (cmd->data_length < 24) { -- cgit v1.2.3 From 1765fe5edcb83f53fc67edeb559fcf4bc82c6460 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 16 Jul 2012 17:10:17 -0700 Subject: target: Fix range calculation in WRITE SAME emulation when num blocks == 0 When NUMBER OF LOGICAL BLOCKS is 0, WRITE SAME is supposed to write all the blocks from the specified LBA through the end of the device. However, dev->transport->get_blocks(dev) (perhaps confusingly) returns the last valid LBA rather than the number of blocks, so the correct number of blocks to write starting with lba is dev->transport->get_blocks(dev) - lba + 1 (nab: Backport roland's for-3.6 patch to for-3.5) Signed-off-by: Roland Dreier Cc: Cc: Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_cdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c index 9888693a18fe..664f6e775d0e 100644 --- a/drivers/target/target_core_cdb.c +++ b/drivers/target/target_core_cdb.c @@ -1095,7 +1095,7 @@ int target_emulate_write_same(struct se_cmd *cmd) if (num_blocks != 0) range = num_blocks; else - range = (dev->transport->get_blocks(dev) - lba); + range = (dev->transport->get_blocks(dev) - lba) + 1; pr_debug("WRITE_SAME UNMAP: LBA: %llu Range: %llu\n", (unsigned long long)lba, (unsigned long long)range); -- cgit v1.2.3 From ffc61ccbb96809df8d97ed609ac86b509eaf9056 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Wed, 11 Jul 2012 12:28:05 +0100 Subject: Initialise mid_q_entry before putting it on the pending queue A user reported a crash in cifs_demultiplex_thread() caused by an incorrectly set mid_q_entry->callback() function. It appears that the callback assignment made in cifs_call_async() was not flushed back to memory suggesting that a memory barrier was required here. Changing the code to make sure that the mid_q_entry structure was completely initialised before it was added to the pending queue fixes the problem. Signed-off-by: Sachin Prabhu Reviewed-by: Jeff Layton Reviewed-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/transport.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 3097ee58fd7d..f25d4ea14be4 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -365,16 +365,14 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, if (mid == NULL) return -ENOMEM; - /* put it on the pending_mid_q */ - spin_lock(&GlobalMid_Lock); - list_add_tail(&mid->qhead, &server->pending_mid_q); - spin_unlock(&GlobalMid_Lock); - rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number); - if (rc) - delete_mid(mid); + if (rc) { + DeleteMidQEntry(mid); + return rc; + } + *ret_mid = mid; - return rc; + return 0; } /* @@ -407,17 +405,21 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, mid->callback_data = cbdata; mid->mid_state = MID_REQUEST_SUBMITTED; + /* put it on the pending_mid_q */ + spin_lock(&GlobalMid_Lock); + list_add_tail(&mid->qhead, &server->pending_mid_q); + spin_unlock(&GlobalMid_Lock); + + cifs_in_send_inc(server); rc = smb_sendv(server, iov, nvec); cifs_in_send_dec(server); cifs_save_when_sent(mid); mutex_unlock(&server->srv_mutex); - if (rc) - goto out_err; + if (rc == 0) + return 0; - return rc; -out_err: delete_mid(mid); add_credits(server, 1); wake_up(&server->request_q); -- cgit v1.2.3 From 3ae629d98bd5ed77585a878566f04f310adbc591 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 11 Jul 2012 09:09:35 -0400 Subject: cifs: on CONFIG_HIGHMEM machines, limit the rsize/wsize to the kmap space We currently rely on being able to kmap all of the pages in an async read or write request. If you're on a machine that has CONFIG_HIGHMEM set then that kmap space is limited, sometimes to as low as 512 slots. With 512 slots, we can only support up to a 2M r/wsize, and that's assuming that we can get our greedy little hands on all of them. There are other users however, so it's possible we'll end up stuck with a size that large. Since we can't handle a rsize or wsize larger than that currently, cap those options at the number of kmap slots we have. We could consider capping it even lower, but we currently default to a max of 1M. Might as well allow those luddites on 32 bit arches enough rope to hang themselves. A more robust fix would be to teach the send and receive routines how to contend with an array of pages so we don't need to marshal up a kvec array at all. That's a fairly significant overhaul though, so we'll need this limit in place until that's ready. Cc: Reported-by: Jian Li Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0ae86ddf2213..94b7788c3189 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3445,6 +3445,18 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) +/* + * On hosts with high memory, we can't currently support wsize/rsize that are + * larger than we can kmap at once. Cap the rsize/wsize at + * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request + * larger than that anyway. + */ +#ifdef CONFIG_HIGHMEM +#define CIFS_KMAP_SIZE_LIMIT (LAST_PKMAP * PAGE_CACHE_SIZE) +#else /* CONFIG_HIGHMEM */ +#define CIFS_KMAP_SIZE_LIMIT (1<<24) +#endif /* CONFIG_HIGHMEM */ + static unsigned int cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) { @@ -3475,6 +3487,9 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) wsize = min_t(unsigned int, wsize, server->maxBuf - sizeof(WRITE_REQ) + 4); + /* limit to the amount that we can kmap at once */ + wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT); + /* hard limit of CIFS_MAX_WSIZE */ wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); @@ -3516,6 +3531,9 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) if (!(server->capabilities & CAP_LARGE_READ_X)) rsize = min_t(unsigned int, CIFSMaxBufSize, rsize); + /* limit to the amount that we can kmap at once */ + rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT); + /* hard limit of CIFS_MAX_RSIZE */ rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE); -- cgit v1.2.3 From 3cf003c08be785af4bee9ac05891a15bcbff856a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 11 Jul 2012 09:09:36 -0400 Subject: cifs: when CONFIG_HIGHMEM is set, serialize the read/write kmaps Jian found that when he ran fsx on a 32 bit arch with a large wsize the process and one of the bdi writeback kthreads would sometimes deadlock with a stack trace like this: crash> bt PID: 2789 TASK: f02edaa0 CPU: 3 COMMAND: "fsx" #0 [eed63cbc] schedule at c083c5b3 #1 [eed63d80] kmap_high at c0500ec8 #2 [eed63db0] cifs_async_writev at f7fabcd7 [cifs] #3 [eed63df0] cifs_writepages at f7fb7f5c [cifs] #4 [eed63e50] do_writepages at c04f3e32 #5 [eed63e54] __filemap_fdatawrite_range at c04e152a #6 [eed63ea4] filemap_fdatawrite at c04e1b3e #7 [eed63eb4] cifs_file_aio_write at f7fa111a [cifs] #8 [eed63ecc] do_sync_write at c052d202 #9 [eed63f74] vfs_write at c052d4ee #10 [eed63f94] sys_write at c052df4c #11 [eed63fb0] ia32_sysenter_target at c0409a98 EAX: 00000004 EBX: 00000003 ECX: abd73b73 EDX: 012a65c6 DS: 007b ESI: 012a65c6 ES: 007b EDI: 00000000 SS: 007b ESP: bf8db178 EBP: bf8db1f8 GS: 0033 CS: 0073 EIP: 40000424 ERR: 00000004 EFLAGS: 00000246 Each task would kmap part of its address array before getting stuck, but not enough to actually issue the write. This patch fixes this by serializing the marshal_iov operations for async reads and writes. The idea here is to ensure that cifs aggressively tries to populate a request before attempting to fulfill another one. As soon as all of the pages are kmapped for a request, then we can unlock and allow another one to proceed. There's no need to do this serialization on non-CONFIG_HIGHMEM arches however, so optimize all of this out when CONFIG_HIGHMEM isn't set. Cc: Reported-by: Jian Li Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 5b400730c213..4ee522b3f66f 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -86,7 +86,31 @@ static struct { #endif /* CONFIG_CIFS_WEAK_PW_HASH */ #endif /* CIFS_POSIX */ -/* Forward declarations */ +#ifdef CONFIG_HIGHMEM +/* + * On arches that have high memory, kmap address space is limited. By + * serializing the kmap operations on those arches, we ensure that we don't + * end up with a bunch of threads in writeback with partially mapped page + * arrays, stuck waiting for kmap to come back. That situation prevents + * progress and can deadlock. + */ +static DEFINE_MUTEX(cifs_kmap_mutex); + +static inline void +cifs_kmap_lock(void) +{ + mutex_lock(&cifs_kmap_mutex); +} + +static inline void +cifs_kmap_unlock(void) +{ + mutex_unlock(&cifs_kmap_mutex); +} +#else /* !CONFIG_HIGHMEM */ +#define cifs_kmap_lock() do { ; } while(0) +#define cifs_kmap_unlock() do { ; } while(0) +#endif /* CONFIG_HIGHMEM */ /* Mark as invalid, all open files on tree connections since they were closed when session to server was lost */ @@ -1503,7 +1527,9 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) } /* marshal up the page array */ + cifs_kmap_lock(); len = rdata->marshal_iov(rdata, data_len); + cifs_kmap_unlock(); data_len -= len; /* issue the read if we have any iovecs left to fill */ @@ -2069,7 +2095,9 @@ cifs_async_writev(struct cifs_writedata *wdata) * and set the iov_len properly for each one. It may also set * wdata->bytes too. */ + cifs_kmap_lock(); wdata->marshal_iov(iov, wdata); + cifs_kmap_unlock(); cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); -- cgit v1.2.3 From cd60042cc1392e79410dc8de9e9c1abb38a29e57 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 6 Jul 2012 07:09:42 -0400 Subject: cifs: always update the inode cache with the results from a FIND_* When we get back a FIND_FIRST/NEXT result, we have some info about the dentry that we use to instantiate a new inode. We were ignoring and discarding that info when we had an existing dentry in the cache. Fix this by updating the inode in place when we find an existing dentry and the uniqueid is the same. Cc: # .31.x Reported-and-Tested-by: Andrew Bartlett Reported-by: Bill Robertson Reported-by: Dion Edwards Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/readdir.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 0a8224d1c4c5..a4217f02fab2 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -86,9 +86,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, dentry = d_lookup(parent, name); if (dentry) { - /* FIXME: check for inode number changes? */ - if (dentry->d_inode != NULL) + inode = dentry->d_inode; + /* update inode in place if i_ino didn't change */ + if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { + cifs_fattr_to_inode(inode, fattr); return dentry; + } d_drop(dentry); dput(dentry); } -- cgit v1.2.3 From 2eebc1e188e9e45886ee00662519849339884d6d Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 16 Jul 2012 09:13:51 +0000 Subject: sctp: Fix list corruption resulting from freeing an association on a list A few days ago Dave Jones reported this oops: [22766.294255] general protection fault: 0000 [#1] PREEMPT SMP [22766.295376] CPU 0 [22766.295384] Modules linked in: [22766.387137] ffffffffa169f292 6b6b6b6b6b6b6b6b ffff880147c03a90 ffff880147c03a74 [22766.387135] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 00000000000 [22766.387136] Process trinity-watchdo (pid: 10896, threadinfo ffff88013e7d2000, [22766.387137] Stack: [22766.387140] ffff880147c03a10 [22766.387140] ffffffffa169f2b6 [22766.387140] ffff88013ed95728 [22766.387143] 0000000000000002 [22766.387143] 0000000000000000 [22766.387143] ffff880003fad062 [22766.387144] ffff88013c120000 [22766.387144] [22766.387145] Call Trace: [22766.387145] [22766.387150] [] ? __sctp_lookup_association+0x62/0xd0 [sctp] [22766.387154] [] __sctp_lookup_association+0x86/0xd0 [sctp] [22766.387157] [] sctp_rcv+0x207/0xbb0 [sctp] [22766.387161] [] ? trace_hardirqs_off_caller+0x28/0xd0 [22766.387163] [] ? nf_hook_slow+0x133/0x210 [22766.387166] [] ? ip_local_deliver_finish+0x4c/0x4c0 [22766.387168] [] ip_local_deliver_finish+0x18d/0x4c0 [22766.387169] [] ? ip_local_deliver_finish+0x4c/0x4c0 [22766.387171] [] ip_local_deliver+0x47/0x80 [22766.387172] [] ip_rcv_finish+0x150/0x680 [22766.387174] [] ip_rcv+0x214/0x320 [22766.387176] [] __netif_receive_skb+0x7b7/0x910 [22766.387178] [] ? __netif_receive_skb+0x11c/0x910 [22766.387180] [] ? put_lock_stats.isra.25+0xe/0x40 [22766.387182] [] netif_receive_skb+0x23/0x1f0 [22766.387183] [] ? dev_gro_receive+0x139/0x440 [22766.387185] [] napi_skb_finish+0x70/0xa0 [22766.387187] [] napi_gro_receive+0xf5/0x130 [22766.387218] [] e1000_receive_skb+0x59/0x70 [e1000e] [22766.387242] [] e1000_clean_rx_irq+0x28b/0x460 [e1000e] [22766.387266] [] e1000e_poll+0x78/0x430 [e1000e] [22766.387268] [] net_rx_action+0x1aa/0x3d0 [22766.387270] [] ? account_system_vtime+0x10f/0x130 [22766.387273] [] __do_softirq+0xe0/0x420 [22766.387275] [] call_softirq+0x1c/0x30 [22766.387278] [] do_softirq+0xd5/0x110 [22766.387279] [] irq_exit+0xd5/0xe0 [22766.387281] [] do_IRQ+0x63/0xd0 [22766.387283] [] common_interrupt+0x6f/0x6f [22766.387283] [22766.387284] [22766.387285] [] ? retint_swapgs+0x13/0x1b [22766.387285] Code: c0 90 5d c3 66 0f 1f 44 00 00 4c 89 c8 5d c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89 5d e8 4c 89 65 f0 4c 89 6d f8 66 66 66 66 90 <0f> b7 87 98 00 00 00 48 89 fb 49 89 f5 66 c1 c0 08 66 39 46 02 [22766.387307] [22766.387307] RIP [22766.387311] [] sctp_assoc_is_match+0x19/0x90 [sctp] [22766.387311] RSP [22766.387142] ffffffffa16ab120 [22766.599537] ---[ end trace 3f6dae82e37b17f5 ]--- [22766.601221] Kernel panic - not syncing: Fatal exception in interrupt It appears from his analysis and some staring at the code that this is likely occuring because an association is getting freed while still on the sctp_assoc_hashtable. As a result, we get a gpf when traversing the hashtable while a freed node corrupts part of the list. Nominally I would think that an mibalanced refcount was responsible for this, but I can't seem to find any obvious imbalance. What I did note however was that the two places where we create an association using sctp_primitive_ASSOCIATE (__sctp_connect and sctp_sendmsg), have failure paths which free a newly created association after calling sctp_primitive_ASSOCIATE. sctp_primitive_ASSOCIATE brings us into the sctp_sf_do_prm_asoc path, which issues a SCTP_CMD_NEW_ASOC side effect, which in turn adds a new association to the aforementioned hash table. the sctp command interpreter that process side effects has not way to unwind previously processed commands, so freeing the association from the __sctp_connect or sctp_sendmsg error path would lead to a freed association remaining on this hash table. I've fixed this but modifying sctp_[un]hash_established to use hlist_del_init, which allows us to proerly use hlist_unhashed to check if the node is on a hashlist safely during a delete. That in turn alows us to safely call sctp_unhash_established in the __sctp_connect and sctp_sendmsg error paths before freeing them, regardles of what the associations state is on the hash list. I noted, while I was doing this, that the __sctp_unhash_endpoint was using hlist_unhsashed in a simmilar fashion, but never nullified any removed nodes pointers to make that function work properly, so I fixed that up in a simmilar fashion. I attempted to test this using a virtual guest running the SCTP_RR test from netperf in a loop while running the trinity fuzzer, both in a loop. I wasn't able to recreate the problem prior to this fix, nor was I able to trigger the failure after (neither of which I suppose is suprising). Given the trace above however, I think its likely that this is what we hit. Signed-off-by: Neil Horman Reported-by: davej@redhat.com CC: davej@redhat.com CC: "David S. Miller" CC: Vlad Yasevich CC: Sridhar Samudrala CC: linux-sctp@vger.kernel.org Signed-off-by: David S. Miller --- net/sctp/input.c | 7 ++----- net/sctp/socket.c | 12 ++++++++++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 80564fe03024..8b9b6790a3df 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -736,15 +736,12 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) epb = &ep->base; - if (hlist_unhashed(&epb->node)) - return; - epb->hashent = sctp_ep_hashfn(epb->bind_addr.port); head = &sctp_ep_hashtable[epb->hashent]; sctp_write_lock(&head->lock); - __hlist_del(&epb->node); + hlist_del_init(&epb->node); sctp_write_unlock(&head->lock); } @@ -825,7 +822,7 @@ static void __sctp_unhash_established(struct sctp_association *asoc) head = &sctp_assoc_hashtable[epb->hashent]; sctp_write_lock(&head->lock); - __hlist_del(&epb->node); + hlist_del_init(&epb->node); sctp_write_unlock(&head->lock); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b3b8a8d813eb..31c7bfcd9b58 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1231,8 +1231,14 @@ out_free: SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p" " kaddrs: %p err: %d\n", asoc, kaddrs, err); - if (asoc) + if (asoc) { + /* sctp_primitive_ASSOCIATE may have added this association + * To the hash table, try to unhash it, just in case, its a noop + * if it wasn't hashed so we're safe + */ + sctp_unhash_established(asoc); sctp_association_free(asoc); + } return err; } @@ -1942,8 +1948,10 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_unlock; out_free: - if (new_asoc) + if (new_asoc) { + sctp_unhash_established(asoc); sctp_association_free(asoc); + } out_unlock: sctp_release_sock(sk); -- cgit v1.2.3 From 936597631dd310e220544dc5c6075d924efd39b2 Mon Sep 17 00:00:00 2001 From: Narendra K Date: Mon, 16 Jul 2012 15:24:41 +0000 Subject: ixgbevf: Prevent RX/TX statistics getting reset to zero The commit 4197aa7bb81877ebb06e4f2cc1b5fea2da23a7bd implements 64 bit per ring statistics. But the driver resets the 'total_bytes' and 'total_packets' from RX and TX rings in the RX and TX interrupt handlers to zero. This results in statistics being lost and user space reporting RX and TX statistics as zero. This patch addresses the issue by preventing the resetting of RX and TX ring statistics to zero. Signed-off-by: Narendra K Tested-by: Sibai Li Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index f69ec4288b10..8b304a43a22d 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -969,8 +969,6 @@ static irqreturn_t ixgbevf_msix_clean_tx(int irq, void *data) r_idx = find_first_bit(q_vector->txr_idx, adapter->num_tx_queues); for (i = 0; i < q_vector->txr_count; i++) { tx_ring = &(adapter->tx_ring[r_idx]); - tx_ring->total_bytes = 0; - tx_ring->total_packets = 0; ixgbevf_clean_tx_irq(adapter, tx_ring); r_idx = find_next_bit(q_vector->txr_idx, adapter->num_tx_queues, r_idx + 1); @@ -994,16 +992,6 @@ static irqreturn_t ixgbevf_msix_clean_rx(int irq, void *data) struct ixgbe_hw *hw = &adapter->hw; struct ixgbevf_ring *rx_ring; int r_idx; - int i; - - r_idx = find_first_bit(q_vector->rxr_idx, adapter->num_rx_queues); - for (i = 0; i < q_vector->rxr_count; i++) { - rx_ring = &(adapter->rx_ring[r_idx]); - rx_ring->total_bytes = 0; - rx_ring->total_packets = 0; - r_idx = find_next_bit(q_vector->rxr_idx, adapter->num_rx_queues, - r_idx + 1); - } if (!q_vector->rxr_count) return IRQ_HANDLED; -- cgit v1.2.3 From ef209f15980360f6945873df3cd710c5f62f2a3e Mon Sep 17 00:00:00 2001 From: Gao feng Date: Wed, 11 Jul 2012 21:50:15 +0000 Subject: net: cgroup: fix access the unallocated memory in netprio cgroup there are some out of bound accesses in netprio cgroup. now before accessing the dev->priomap.priomap array,we only check if the dev->priomap exist.and because we don't want to see additional bound checkings in fast path, so we should make sure that dev->priomap is null or array size of dev->priomap.priomap is equal to max_prioidx + 1; so in write_priomap logic,we should call extend_netdev_table when dev->priomap is null and dev->priomap.priomap_len < max_len. and in cgrp_create->update_netdev_tables logic,we should call extend_netdev_table only when dev->priomap exist and dev->priomap.priomap_len < max_len. and it's not needed to call update_netdev_tables in write_priomap, we can only allocate the net device's priomap which we change through net_prio.ifpriomap. this patch also add a return value for update_netdev_tables & extend_netdev_table, so when new_priomap is allocated failed, write_priomap will stop to access the priomap,and return -ENOMEM back to the userspace to tell the user what happend. Change From v3: 1. add rtnl protect when reading max_prioidx in write_priomap. 2. only call extend_netdev_table when map->priomap_len < max_len, this will make sure array size of dev->map->priomap always bigger than any prioidx. 3. add a function write_update_netdev_table to make codes clear. Change From v2: 1. protect extend_netdev_table by RTNL. 2. when extend_netdev_table failed,call dev_put to reduce device's refcount. Signed-off-by: Gao feng Cc: Neil Horman Cc: Eric Dumazet Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 71 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 17 deletions(-) diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 3e953eaddbfc..b2e9caa1ad1a 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -65,7 +65,7 @@ static void put_prioidx(u32 idx) spin_unlock_irqrestore(&prioidx_map_lock, flags); } -static void extend_netdev_table(struct net_device *dev, u32 new_len) +static int extend_netdev_table(struct net_device *dev, u32 new_len) { size_t new_size = sizeof(struct netprio_map) + ((sizeof(u32) * new_len)); @@ -77,7 +77,7 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len) if (!new_priomap) { pr_warn("Unable to alloc new priomap!\n"); - return; + return -ENOMEM; } for (i = 0; @@ -90,46 +90,79 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len) rcu_assign_pointer(dev->priomap, new_priomap); if (old_priomap) kfree_rcu(old_priomap, rcu); + return 0; } -static void update_netdev_tables(void) +static int write_update_netdev_table(struct net_device *dev) { + int ret = 0; + u32 max_len; + struct netprio_map *map; + + rtnl_lock(); + max_len = atomic_read(&max_prioidx) + 1; + map = rtnl_dereference(dev->priomap); + if (!map || map->priomap_len < max_len) + ret = extend_netdev_table(dev, max_len); + rtnl_unlock(); + + return ret; +} + +static int update_netdev_tables(void) +{ + int ret = 0; struct net_device *dev; - u32 max_len = atomic_read(&max_prioidx) + 1; + u32 max_len; struct netprio_map *map; rtnl_lock(); + max_len = atomic_read(&max_prioidx) + 1; for_each_netdev(&init_net, dev) { map = rtnl_dereference(dev->priomap); - if ((!map) || - (map->priomap_len < max_len)) - extend_netdev_table(dev, max_len); + /* + * don't allocate priomap if we didn't + * change net_prio.ifpriomap (map == NULL), + * this will speed up skb_update_prio. + */ + if (map && map->priomap_len < max_len) { + ret = extend_netdev_table(dev, max_len); + if (ret < 0) + break; + } } rtnl_unlock(); + return ret; } static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) { struct cgroup_netprio_state *cs; - int ret; + int ret = -EINVAL; cs = kzalloc(sizeof(*cs), GFP_KERNEL); if (!cs) return ERR_PTR(-ENOMEM); - if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) { - kfree(cs); - return ERR_PTR(-EINVAL); - } + if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) + goto out; ret = get_prioidx(&cs->prioidx); - if (ret != 0) { + if (ret < 0) { pr_warn("No space in priority index array\n"); - kfree(cs); - return ERR_PTR(ret); + goto out; + } + + ret = update_netdev_tables(); + if (ret < 0) { + put_prioidx(cs->prioidx); + goto out; } return &cs->css; +out: + kfree(cs); + return ERR_PTR(ret); } static void cgrp_destroy(struct cgroup *cgrp) @@ -221,13 +254,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft, if (!dev) goto out_free_devname; - update_netdev_tables(); - ret = 0; + ret = write_update_netdev_table(dev); + if (ret < 0) + goto out_put_dev; + rcu_read_lock(); map = rcu_dereference(dev->priomap); if (map) map->priomap[prioidx] = priority; rcu_read_unlock(); + +out_put_dev: dev_put(dev); out_free_devname: -- cgit v1.2.3 From 96f80d123eff05c3cd4701463786b87952a6c3ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sjur=20Br=C3=A6ndeland?= Date: Sun, 15 Jul 2012 10:10:14 +0000 Subject: caif: Fix access to freed pernet memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit unregister_netdevice_notifier() must be called before unregister_pernet_subsys() to avoid accessing already freed pernet memory. This fixes the following oops when doing rmmod: Call Trace: [] caif_device_notify+0x4d/0x5a0 [caif] [] unregister_netdevice_notifier+0xb9/0x100 [] caif_device_exit+0x1c/0x250 [caif] [] sys_delete_module+0x1a4/0x300 [] ? trace_hardirqs_on_caller+0x15d/0x1e0 [] ? trace_hardirqs_on_thunk+0x3a/0x3 [] system_call_fastpath+0x1a/0x1f RIP [] caif_get+0x51/0xb0 [caif] Signed-off-by: Sjur Brændeland Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/caif/caif_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 554b31289607..8c83c175b03a 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -561,9 +561,9 @@ static int __init caif_device_init(void) static void __exit caif_device_exit(void) { - unregister_pernet_subsys(&caif_net_ops); unregister_netdevice_notifier(&caif_device_notifier); dev_remove_pack(&caif_packet_type); + unregister_pernet_subsys(&caif_net_ops); } module_init(caif_device_init); -- cgit v1.2.3 From 68653359beb59014b88e4f297e467730a1680996 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 13 Jul 2012 20:15:34 +0000 Subject: MAINTAINERS: reflect actual changes in IEEE 802.15.4 maintainership As the life flows, developers priorities shifts a bit. Reflect actual changes in the maintainership of IEEE 802.15.4 code: Sergey mostly stopped cared about this piece of code. Most of the work recently was done by Alexander, so put him to the MAINTAINERS file to reflect his status and to ease the life of respective patches. Also add new net/mac802154/ directory to the list of maintained files. Signed-off-by: Dmitry Eremin-Solenikov Cc: Alexander Smirnov Signed-off-by: David S. Miller --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 03df1d15ebf3..3064a9c3b3e7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3433,13 +3433,14 @@ S: Supported F: drivers/idle/i7300_idle.c IEEE 802.15.4 SUBSYSTEM +M: Alexander Smirnov M: Dmitry Eremin-Solenikov -M: Sergey Lapin L: linux-zigbee-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://apps.sourceforge.net/trac/linux-zigbee T: git git://git.kernel.org/pub/scm/linux/kernel/git/lowpan/lowpan.git S: Maintained F: net/ieee802154/ +F: net/mac802154/ F: drivers/ieee802154/ IIO SUBSYSTEM AND DRIVERS -- cgit v1.2.3 From ef764a13b8f4bc4b532a51bc37c35b3974831b29 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 13 Jul 2012 06:33:08 +0000 Subject: ax25: Fix missing break At least there seems to be no reason to disallow ROSE sockets when NETROM is loaded. Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 051f7abae66d..779095ded689 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -842,6 +842,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol, case AX25_P_NETROM: if (ax25_protocol_is_registered(AX25_P_NETROM)) return -ESOCKTNOSUPPORT; + break; #endif #ifdef CONFIG_ROSE_MODULE case AX25_P_ROSE: -- cgit v1.2.3 From 10cc1bdd5ef65f60f570b594c4c066d763c128fb Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 16 Jul 2012 23:44:48 +0000 Subject: ixgbevf: Fix panic when loading driver This patch addresses a kernel panic seen when setting up the interface. Specifically we see a NULL pointer dereference on the Tx descriptor cleanup path when enabling interrupts. This change corrects that so it cannot occur. Signed-off-by: Alexander Duyck Acked-by: Greg Rose Tested-by: Sibai Li Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 8b304a43a22d..41e32257a4e8 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -201,6 +201,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_adapter *adapter, unsigned int i, eop, count = 0; unsigned int total_bytes = 0, total_packets = 0; + if (test_bit(__IXGBEVF_DOWN, &adapter->state)) + return true; + i = tx_ring->next_to_clean; eop = tx_ring->tx_buffer_info[i].next_to_watch; eop_desc = IXGBE_TX_DESC_ADV(*tx_ring, eop); -- cgit v1.2.3 From 9e33ce453f8ac8452649802bee1f410319408f4b Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Sat, 7 Jul 2012 18:26:10 +0800 Subject: ipvs: fix oops on NAT reply in br_nf context IPVS should not reset skb->nf_bridge in FORWARD hook by calling nf_reset for NAT replies. It triggers oops in br_nf_forward_finish. [ 579.781508] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 [ 579.781669] IP: [] br_nf_forward_finish+0x58/0x112 [ 579.781792] PGD 218f9067 PUD 0 [ 579.781865] Oops: 0000 [#1] SMP [ 579.781945] CPU 0 [ 579.781983] Modules linked in: [ 579.782047] [ 579.782080] [ 579.782114] Pid: 4644, comm: qemu Tainted: G W 3.5.0-rc5-00006-g95e69f9 #282 Hewlett-Packard /30E8 [ 579.782300] RIP: 0010:[] [] br_nf_forward_finish+0x58/0x112 [ 579.782455] RSP: 0018:ffff88007b003a98 EFLAGS: 00010287 [ 579.782541] RAX: 0000000000000008 RBX: ffff8800762ead00 RCX: 000000000001670a [ 579.782653] RDX: 0000000000000000 RSI: 000000000000000a RDI: ffff8800762ead00 [ 579.782845] RBP: ffff88007b003ac8 R08: 0000000000016630 R09: ffff88007b003a90 [ 579.782957] R10: ffff88007b0038e8 R11: ffff88002da37540 R12: ffff88002da01a02 [ 579.783066] R13: ffff88002da01a80 R14: ffff88002d83c000 R15: ffff88002d82a000 [ 579.783177] FS: 0000000000000000(0000) GS:ffff88007b000000(0063) knlGS:00000000f62d1b70 [ 579.783306] CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b [ 579.783395] CR2: 0000000000000004 CR3: 00000000218fe000 CR4: 00000000000027f0 [ 579.783505] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 579.783684] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 579.783795] Process qemu (pid: 4644, threadinfo ffff880021b20000, task ffff880021aba760) [ 579.783919] Stack: [ 579.783959] ffff88007693cedc ffff8800762ead00 ffff88002da01a02 ffff8800762ead00 [ 579.784110] ffff88002da01a02 ffff88002da01a80 ffff88007b003b18 ffffffff817b26c7 [ 579.784260] ffff880080000000 ffffffff81ef59f0 ffff8800762ead00 ffffffff81ef58b0 [ 579.784477] Call Trace: [ 579.784523] [ 579.784562] [ 579.784603] [] br_nf_forward_ip+0x275/0x2c8 [ 579.784707] [] nf_iterate+0x47/0x7d [ 579.784797] [] ? br_dev_queue_push_xmit+0xae/0xae [ 579.784906] [] nf_hook_slow+0x6d/0x102 [ 579.784995] [] ? br_dev_queue_push_xmit+0xae/0xae [ 579.785175] [] ? _raw_write_unlock_bh+0x19/0x1b [ 579.785179] [] __br_forward+0x97/0xa2 [ 579.785179] [] br_handle_frame_finish+0x1a6/0x257 [ 579.785179] [] br_nf_pre_routing_finish+0x26d/0x2cb [ 579.785179] [] br_nf_pre_routing+0x55d/0x5c1 [ 579.785179] [] nf_iterate+0x47/0x7d [ 579.785179] [] ? br_handle_local_finish+0x44/0x44 [ 579.785179] [] nf_hook_slow+0x6d/0x102 [ 579.785179] [] ? br_handle_local_finish+0x44/0x44 [ 579.785179] [] ? sky2_poll+0xb35/0xb54 [ 579.785179] [] br_handle_frame+0x213/0x229 [ 579.785179] [] ? br_handle_frame_finish+0x257/0x257 [ 579.785179] [] __netif_receive_skb+0x2b4/0x3f1 [ 579.785179] [] process_backlog+0x99/0x1e2 [ 579.785179] [] net_rx_action+0xdf/0x242 [ 579.785179] [] __do_softirq+0xc1/0x1e0 [ 579.785179] [] ? trace_hardirqs_off_thunk+0x3a/0x6c [ 579.785179] [] call_softirq+0x1c/0x30 The steps to reproduce as follow, 1. On Host1, setup brige br0(192.168.1.106) 2. Boot a kvm guest(192.168.1.105) on Host1 and start httpd 3. Start IPVS service on Host1 ipvsadm -A -t 192.168.1.106:80 -s rr ipvsadm -a -t 192.168.1.106:80 -r 192.168.1.105:80 -m 4. Run apache benchmark on Host2(192.168.1.101) ab -n 1000 http://192.168.1.106/ ip_vs_reply4 ip_vs_out handle_response ip_vs_notrack nf_reset() { skb->nf_bridge = NULL; } Actually, IPVS wants in this case just to replace nfct with untracked version. So replace the nf_reset(skb) call in ip_vs_notrack() with a nf_conntrack_put(skb->nfct) call. Signed-off-by: Lin Ming Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d6146b4811c2..95374d1696a1 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1425,7 +1425,7 @@ static inline void ip_vs_notrack(struct sk_buff *skb) struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (!ct || !nf_ct_is_untracked(ct)) { - nf_reset(skb); + nf_conntrack_put(skb->nfct); skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); -- cgit v1.2.3 From 283283c4da91adc44b03519f434ee1e7e91d6fdb Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 7 Jul 2012 20:30:11 +0300 Subject: ipvs: fix oops in ip_vs_dst_event on rmmod After commit 39f618b4fd95ae243d940ec64c961009c74e3333 (3.4) "ipvs: reset ipvs pointer in netns" we can oops in ip_vs_dst_event on rmmod ip_vs because ip_vs_control_cleanup is called after the ipvs_core_ops subsys is unregistered and net->ipvs is NULL. Fix it by exiting early from ip_vs_dst_event if ipvs is NULL. It is safe because all services and dests for the net are already freed. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d43e3c122f7b..84444dda194b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1521,11 +1521,12 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = ptr; struct net *net = dev_net(dev); + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_service *svc; struct ip_vs_dest *dest; unsigned int idx; - if (event != NETDEV_UNREGISTER) + if (event != NETDEV_UNREGISTER || !ipvs) return NOTIFY_DONE; IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); EnterFunction(2); @@ -1551,7 +1552,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, } } - list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) { + list_for_each_entry(dest, &ipvs->dest_trash, n_list) { __ip_vs_dev_reset(dest, dev); } mutex_unlock(&__ip_vs_mutex); -- cgit v1.2.3 From 9634252617441991b01dacaf4040866feecaf36f Mon Sep 17 00:00:00 2001 From: Federico Fuga Date: Mon, 16 Jul 2012 10:36:51 +0300 Subject: rpmsg: fix dependency on initialization order When rpmsg drivers are built into the kernel, they must not initialize before the rpmsg bus does, otherwise they'd trigger a BUG() in drivers/base/driver.c line 169 (driver_register()). To fix that, and to stop depending on arbitrary linkage ordering of those built-in rpmsg drivers, we make the rpmsg bus initialize at subsys_initcall. Cc: stable Signed-off-by: Federico Fuga [ohad: rewrite the commit log] Signed-off-by: Ohad Ben-Cohen --- drivers/rpmsg/virtio_rpmsg_bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 39d3aa41adda..f56c8ba3a861 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -1085,7 +1085,7 @@ static int __init rpmsg_init(void) return ret; } -module_init(rpmsg_init); +subsys_initcall(rpmsg_init); static void __exit rpmsg_fini(void) { -- cgit v1.2.3 From d9914cf66181b8aa0929775f5c6f675c6ebc3eb5 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Tue, 17 Jul 2012 21:37:27 +0200 Subject: PM: Rename CAP_EPOLLWAKEUP to CAP_BLOCK_SUSPEND As discussed in http://thread.gmane.org/gmane.linux.kernel/1249726/focus=1288990, the capability introduced in 4d7e30d98939a0340022ccd49325a3d70f7e0238 to govern EPOLLWAKEUP seems misnamed: this capability is about governing the ability to suspend the system, not using a particular API flag (EPOLLWAKEUP). We should make the name of the capability more general to encourage reuse in related cases. (Whether or not this capability should also be used to govern the use of /sys/power/wake_lock is a question that needs to be separately resolved.) This patch renames the capability to CAP_BLOCK_SUSPEND. In order to ensure that the old capability name doesn't make it out into the wild, could you please apply and push up the tree to ensure that it is incorporated for the 3.5 release. Signed-off-by: Michael Kerrisk Acked-by: Serge Hallyn Signed-off-by: Rafael J. Wysocki --- fs/eventpoll.c | 2 +- include/linux/capability.h | 6 +++--- include/linux/eventpoll.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 74598f67efeb..1c8b55670804 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1710,7 +1710,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, goto error_tgt_fput; /* Check if EPOLLWAKEUP is allowed */ - if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP)) + if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND)) epds.events &= ~EPOLLWAKEUP; /* diff --git a/include/linux/capability.h b/include/linux/capability.h index 68d56effc328..d10b7ed595b1 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -360,11 +360,11 @@ struct cpu_vfs_cap_data { #define CAP_WAKE_ALARM 35 -/* Allow preventing system suspends while epoll events are pending */ +/* Allow preventing system suspends */ -#define CAP_EPOLLWAKEUP 36 +#define CAP_BLOCK_SUSPEND 36 -#define CAP_LAST_CAP CAP_EPOLLWAKEUP +#define CAP_LAST_CAP CAP_BLOCK_SUSPEND #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 6f8be328770a..f4bb378ccf6a 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -34,7 +34,7 @@ * re-allowed until epoll_wait is called again after consuming the wakeup * event(s). * - * Requires CAP_EPOLLWAKEUP + * Requires CAP_BLOCK_SUSPEND */ #define EPOLLWAKEUP (1 << 29) -- cgit v1.2.3 From c8f4a2d095bcb7ff798f984b9c7d16b4c8d194c3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 17 Jul 2012 15:47:51 -0700 Subject: bootmem: make ___alloc_bootmem_node_nopanic() really nopanic In reaction to commit 99ab7b19440a ("mm: sparse: fix usemap allocation above node descriptor section") Johannes said: | while backporting the below patch, I realised that your fix busted | f5bf18fa22f8 again. The problem was not a panicking version on | allocation failure but when the usemap size was too large such that | goal + size > limit triggers the BUG_ON in the bootmem allocator. So | we need a version that passes limit ONLY if the usemap is smaller than | the section. after checking the code, the name of ___alloc_bootmem_node_nopanic() does not reflect the fact. Make bootmem really not panic. Hope will kill bootmem sooner. Signed-off-by: Yinghai Lu Cc: Johannes Weiner Cc: [3.3.x, 3.4.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/bootmem.c b/mm/bootmem.c index 73096630cb35..bcb63ac48cc5 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -710,6 +710,10 @@ again: if (ptr) return ptr; + /* do not panic in alloc_bootmem_bdata() */ + if (limit && goal + size > limit) + limit = 0; + ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit); if (ptr) return ptr; -- cgit v1.2.3 From b45f9330b4e08ee4de5e2f5cae75200edb283a96 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 Jul 2012 15:47:54 -0700 Subject: mn10300: fix "pull clearing RESTORE_SIGMASK into block_sigmask()" fallout Commit a610d6e672d6 ("pull clearing RESTORE_SIGMASK into block_sigmask()") caused: arch/mn10300/kernel/signal.c: In function 'handle_signal': arch/mn10300/kernel/signal.c:462:3: warning: 'return' with no value, in function returning non-void [-Wreturn-type] Add the missing return values, and restore the indentation while we're at it. Signed-off-by: Geert Uytterhoeven Cc: Al Viro Cc: David Howells Cc: Koichi Yasutake Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mn10300/kernel/signal.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c index 6ab0bee2a54f..4d584ae29ae1 100644 --- a/arch/mn10300/kernel/signal.c +++ b/arch/mn10300/kernel/signal.c @@ -459,10 +459,11 @@ static int handle_signal(int sig, else ret = setup_frame(sig, ka, oldset, regs); if (ret) - return; + return ret; signal_delivered(sig, info, ka, regs, - test_thread_flag(TIF_SINGLESTEP)); + test_thread_flag(TIF_SINGLESTEP)); + return 0; } /* -- cgit v1.2.3 From 07f604ccfdff2afe8224aee9322080a7c4c2014c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 Jul 2012 15:47:57 -0700 Subject: m32r: remove duplicate definition of PTRACE_O_TRACESYSGOOD Fix the m32r build warning: include/linux/ptrace.h:66:0: warning: "PTRACE_O_TRACESYSGOOD" redefined [enabled by default] arch/m32r/include/asm/ptrace.h:117:0: note: this is the location of the previous definition We already have it in , so remove it from Signed-off-by: Geert Uytterhoeven Cc: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/include/asm/ptrace.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/m32r/include/asm/ptrace.h b/arch/m32r/include/asm/ptrace.h index 527527584dd0..4313aa62b51b 100644 --- a/arch/m32r/include/asm/ptrace.h +++ b/arch/m32r/include/asm/ptrace.h @@ -113,9 +113,6 @@ struct pt_regs { #define PTRACE_OLDSETOPTIONS 21 -/* options set using PTRACE_SETOPTIONS */ -#define PTRACE_O_TRACESYSGOOD 0x00000001 - #ifdef __KERNEL__ #include /* M32R_PSW_BSM, M32R_PSW_BPM */ -- cgit v1.2.3 From f9717f3110c8d05ad65b9a91b698cc0612fd77af Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 Jul 2012 15:47:59 -0700 Subject: m32r: fix pull clearing RESTORE_SIGMASK into block_sigmask() fallout Commit a610d6e672d6 ("pull clearing RESTORE_SIGMASK into block_sigmask()") caused: arch/m32r/kernel/signal.c: In function 'handle_signal': arch/m32r/kernel/signal.c:289:6: warning: 'return' with a value, in function returning void [enabled by default] Remove the return value it forgot to remove. Signed-off-by: Geert Uytterhoeven Cc: Al Viro Cc: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c index f3fb2c029cfc..d0f60b97bbc5 100644 --- a/arch/m32r/kernel/signal.c +++ b/arch/m32r/kernel/signal.c @@ -286,7 +286,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, case -ERESTARTNOINTR: regs->r0 = regs->orig_r0; if (prev_insn(regs) < 0) - return -EFAULT; + return; } } -- cgit v1.2.3 From a6b202979661eb32646048aeaad7be7b70c2cd22 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 Jul 2012 15:48:00 -0700 Subject: m32r: fix 'fix breakage from "m32r: use generic ptrace_resume code"' fallout Commit acdc0d5ef9dd ('m32r: fix breakage from "m32r: use generic ptrace_resume code"') tried to fix a problem in commit e34112e3966fc ("m32r: use generic ptrace_resume code") by returning values in a function returning void, causing: arch/m32r/kernel/ptrace.c: In function 'user_enable_single_step': arch/m32r/kernel/ptrace.c:594:3: warning: 'return' with a value, in function returning void [enabled by default] arch/m32r/kernel/ptrace.c:598:3: warning: 'return' with a value, in function returning void [enabled by default] arch/m32r/kernel/ptrace.c:601:3: warning: 'return' with a value, in function returning void [enabled by default] arch/m32r/kernel/ptrace.c:604:2: warning: 'return' with a value, in function returning void [enabled by default] Remove the unneeded return values. Signed-off-by: Geert Uytterhoeven Cc: Al Viro Cc: Christoph Hellwig Cc: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/kernel/ptrace.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index 4c03361537aa..51f5e9aa4901 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -591,17 +591,16 @@ void user_enable_single_step(struct task_struct *child) if (access_process_vm(child, pc&~3, &insn, sizeof(insn), 0) != sizeof(insn)) - return -EIO; + return; compute_next_pc(insn, pc, &next_pc, child); if (next_pc & 0x80000000) - return -EIO; + return; if (embed_debug_trap(child, next_pc)) - return -EIO; + return; invalidate_cache(); - return 0; } void user_disable_single_step(struct task_struct *child) -- cgit v1.2.3 From df12aef6a19bb2d69859a94936bda0e6ccaf3327 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 Jul 2012 15:48:02 -0700 Subject: m32r: consistently use "suffix-$(...)" Commit a556bec9955c ("m32r: fix arch/m32r/boot/compressed/Makefile") changed "$(suffix_y)" to "$(suffix-y)", but didn't update any location where "suffix_y" is set, causing: make[5]: *** No rule to make target `arch/m32r/boot/compressed/vmlinux.bin.', needed by `arch/m32r/boot/compressed/piggy.o'. Stop. make[4]: *** [arch/m32r/boot/compressed/vmlinux] Error 2 make[3]: *** [zImage] Error 2 Correct the other locations to fix this. Signed-off-by: Geert Uytterhoeven Cc: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/boot/compressed/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/m32r/boot/compressed/Makefile b/arch/m32r/boot/compressed/Makefile index 177716b1d613..01729c2979ba 100644 --- a/arch/m32r/boot/compressed/Makefile +++ b/arch/m32r/boot/compressed/Makefile @@ -43,9 +43,9 @@ endif OBJCOPYFLAGS += -R .empty_zero_page -suffix_$(CONFIG_KERNEL_GZIP) = gz -suffix_$(CONFIG_KERNEL_BZIP2) = bz2 -suffix_$(CONFIG_KERNEL_LZMA) = lzma +suffix-$(CONFIG_KERNEL_GZIP) = gz +suffix-$(CONFIG_KERNEL_BZIP2) = bz2 +suffix-$(CONFIG_KERNEL_LZMA) = lzma $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) FORCE $(call if_changed,ld) -- cgit v1.2.3 From a8abbca6617e1caa2344d2d38d0a35f3e5928b79 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 Jul 2012 15:48:04 -0700 Subject: m32r: add memcpy() for CONFIG_KERNEL_GZIP=y Fix the m32r link error: LD arch/m32r/boot/compressed/vmlinux arch/m32r/boot/compressed/misc.o: In function `zlib_updatewindow': misc.c:(.text+0x190): undefined reference to `memcpy' misc.c:(.text+0x190): relocation truncated to fit: R_M32R_26_PLTREL against undefined symbol `memcpy' make[5]: *** [arch/m32r/boot/compressed/vmlinux] Error 1 by adding our own implementation of memcpy(). Signed-off-by: Geert Uytterhoeven Cc: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/boot/compressed/misc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c index 370d60881977..3147aa248b93 100644 --- a/arch/m32r/boot/compressed/misc.c +++ b/arch/m32r/boot/compressed/misc.c @@ -39,6 +39,16 @@ static void *memset(void *s, int c, size_t n) #endif #ifdef CONFIG_KERNEL_GZIP +void *memcpy(void *dest, const void *src, size_t n) +{ + char *d = dest; + const char *s = src; + while (n--) + *d++ = *s++; + + return dest; +} + #define BOOT_HEAP_SIZE 0x10000 #include "../../../../lib/decompress_inflate.c" #endif -- cgit v1.2.3 From 9a75c6e5240f7edc5955e8da5b94bde6f96070b3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 Jul 2012 15:48:05 -0700 Subject: m32r: make memset() global for CONFIG_KERNEL_BZIP2=y Fix the m32r compile error: arch/m32r/boot/compressed/misc.c:31:14: error: static declaration of 'memset' follows non-static declaration make[5]: *** [arch/m32r/boot/compressed/misc.o] Error 1 make[4]: *** [arch/m32r/boot/compressed/vmlinux] Error 2 by removing the static keyword. Signed-off-by: Geert Uytterhoeven Cc: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/boot/compressed/misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c index 3147aa248b93..28a09529f206 100644 --- a/arch/m32r/boot/compressed/misc.c +++ b/arch/m32r/boot/compressed/misc.c @@ -28,7 +28,7 @@ static unsigned long free_mem_ptr; static unsigned long free_mem_end_ptr; #ifdef CONFIG_KERNEL_BZIP2 -static void *memset(void *s, int c, size_t n) +void *memset(void *s, int c, size_t n) { char *ss = s; -- cgit v1.2.3 From 1c7e7f6c0703d03af6bcd5ccc11fc15d23e5ecbe Mon Sep 17 00:00:00 2001 From: Aaditya Kumar Date: Tue, 17 Jul 2012 15:48:07 -0700 Subject: mm: fix lost kswapd wakeup in kswapd_stop() Offlining memory may block forever, waiting for kswapd() to wake up because kswapd() does not check the event kthread->should_stop before sleeping. The proper pattern, from Documentation/memory-barriers.txt, is: --- waker --- event_indicated = 1; wake_up_process(event_daemon); --- sleeper --- for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (event_indicated) break; schedule(); } set_current_state() may be wrapped by: prepare_to_wait(); In the kswapd() case, event_indicated is kthread->should_stop. === offlining memory (waker) === kswapd_stop() kthread_stop() kthread->should_stop = 1 wake_up_process() wait_for_completion() === kswapd_try_to_sleep (sleeper) === kswapd_try_to_sleep() prepare_to_wait() . . schedule() . . finish_wait() The schedule() needs to be protected by a test of kthread->should_stop, which is wrapped by kthread_should_stop(). Reproducer: Do heavy file I/O in background. Do a memory offline/online in a tight loop Signed-off-by: Aaditya Kumar Acked-by: KOSAKI Motohiro Reviewed-by: Minchan Kim Acked-by: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 661576324c7f..66e431060c05 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2688,7 +2688,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx) * them before going back to sleep. */ set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); - schedule(); + + if (!kthread_should_stop()) + schedule(); + set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold); } else { if (remaining) -- cgit v1.2.3 From 5bdca4e0768d3e0f4efa43d9a2cc8210aeb91ab9 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 10 Jul 2012 11:53:34 -0700 Subject: libceph: fix messenger retry In ancient times, the messenger could both initiate and accept connections. An artifact if that was data structures to store/process an incoming ceph_msg_connect request and send an outgoing ceph_msg_connect_reply. Sadly, the negotiation code was referencing those structures and ignoring important information (like the peer's connect_seq) from the correct ones. Among other things, this fixes tight reconnect loops where the server sends RETRY_SESSION and we (the client) retries with the same connect_seq as last time. This bug pretty easily triggered by injecting socket failures on the MDS and running some fs workload like workunits/direct_io/test_sync_io. Signed-off-by: Sage Weil --- include/linux/ceph/messenger.h | 12 ++---------- net/ceph/messenger.c | 12 ++++++------ 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 2521a95fa6d9..44c87e731e9d 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -163,16 +163,8 @@ struct ceph_connection { /* connection negotiation temps */ char in_banner[CEPH_BANNER_MAX_LEN]; - union { - struct { /* outgoing connection */ - struct ceph_msg_connect out_connect; - struct ceph_msg_connect_reply in_reply; - }; - struct { /* incoming */ - struct ceph_msg_connect in_connect; - struct ceph_msg_connect_reply out_reply; - }; - }; + struct ceph_msg_connect out_connect; + struct ceph_msg_connect_reply in_reply; struct ceph_entity_addr actual_peer_addr; /* message out temps */ diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b332c3d76059..10255e81be79 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1423,7 +1423,7 @@ static int process_connect(struct ceph_connection *con) * dropped messages. */ dout("process_connect got RESET peer seq %u\n", - le32_to_cpu(con->in_connect.connect_seq)); + le32_to_cpu(con->in_reply.connect_seq)); pr_err("%s%lld %s connection reset\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr)); @@ -1450,10 +1450,10 @@ static int process_connect(struct ceph_connection *con) * If we sent a smaller connect_seq than the peer has, try * again with a larger value. */ - dout("process_connect got RETRY my seq = %u, peer_seq = %u\n", + dout("process_connect got RETRY_SESSION my seq %u, peer %u\n", le32_to_cpu(con->out_connect.connect_seq), - le32_to_cpu(con->in_connect.connect_seq)); - con->connect_seq = le32_to_cpu(con->in_connect.connect_seq); + le32_to_cpu(con->in_reply.connect_seq)); + con->connect_seq = le32_to_cpu(con->in_reply.connect_seq); ceph_con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) @@ -1468,9 +1468,9 @@ static int process_connect(struct ceph_connection *con) */ dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n", con->peer_global_seq, - le32_to_cpu(con->in_connect.global_seq)); + le32_to_cpu(con->in_reply.global_seq)); get_global_seq(con->msgr, - le32_to_cpu(con->in_connect.global_seq)); + le32_to_cpu(con->in_reply.global_seq)); ceph_con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) -- cgit v1.2.3 From 236df3755d944c33120d77e84b5ff6f3917eb307 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 6 Jun 2012 09:15:33 -0500 Subject: rbd: Fix ceph_snap_context size calculation ceph_snap_context->snaps is an u64 array Signed-off-by: Zheng Yan Reviewed-by: Alex Elder (cherry picked from commit f9f9a1904467816452fc70740165030e84c2c659) --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 65665c9c42c6..8b9c1734d807 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -499,7 +499,7 @@ static int rbd_header_from_disk(struct rbd_image_header *header, / sizeof (*ondisk)) return -EINVAL; header->snapc = kmalloc(sizeof(struct ceph_snap_context) + - snap_count * sizeof (*ondisk), + snap_count * sizeof(u64), gfp_flags); if (!header->snapc) return -ENOMEM; -- cgit v1.2.3 From 6a3ca4f18873f950895cb64ddefafb51a732e3f7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 6 Jun 2012 09:15:33 -0500 Subject: rbd: endian bug in rbd_req_cb() Sparse complains about this because: drivers/block/rbd.c:996:20: warning: cast to restricted __le32 drivers/block/rbd.c:996:20: warning: cast from restricted __le16 These are set in osd_req_encode_op() and they are le16. Signed-off-by: Dan Carpenter Reviewed-by: Alex Elder (cherry picked from commit 895cfcc810e53d7d36639969c71efb9087221167) --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 8b9c1734d807..8f428a8ab003 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -977,7 +977,7 @@ static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) op = (void *)(replyhead + 1); rc = le32_to_cpu(replyhead->result); bytes = le64_to_cpu(op->extent.length); - read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ); + read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc); -- cgit v1.2.3 From 5cb6a9bccaa65e0cbf567485ac6d04ca3fdae79c Mon Sep 17 00:00:00 2001 From: Vipul Kumar Samar Date: Tue, 10 Jul 2012 17:12:43 +0530 Subject: clk:spear1340:Fix: Rename clk ids within predefined limit The max limit of con_id is 16 and dev_id is 20. As of now for spear1340, many clk ids are exceeding this predefined limit. This patch rename clk ids like: mux_clk -> _mclk gate_clk -> _gclk synth_clk -> syn_clk gmac_phy -> phy_ gmii_125m_pad_ -> gmii_pad Signed-off-by: Vipul Kumar Samar Signed-off-by: Shiraz Hashim Acked-by: Viresh Kumar Acked-by: Arnd Bergmann --- drivers/clk/spear/spear1340_clock.c | 273 ++++++++++++++++++------------------ 1 file changed, 135 insertions(+), 138 deletions(-) diff --git a/drivers/clk/spear/spear1340_clock.c b/drivers/clk/spear/spear1340_clock.c index e3ea72162236..0f2324b9321b 100644 --- a/drivers/clk/spear/spear1340_clock.c +++ b/drivers/clk/spear/spear1340_clock.c @@ -370,26 +370,24 @@ static struct frac_rate_tbl gen_rtbl[] = { /* clock parents */ static const char *vco_parents[] = { "osc_24m_clk", "osc_25m_clk", }; static const char *sys_parents[] = { "none", "pll1_clk", "none", "none", - "sys_synth_clk", "none", "pll2_clk", "pll3_clk", }; -static const char *ahb_parents[] = { "cpu_div3_clk", "amba_synth_clk", }; + "sys_syn_clk", "none", "pll2_clk", "pll3_clk", }; +static const char *ahb_parents[] = { "cpu_div3_clk", "amba_syn_clk", }; static const char *gpt_parents[] = { "osc_24m_clk", "apb_clk", }; static const char *uart0_parents[] = { "pll5_clk", "osc_24m_clk", - "uart0_synth_gate_clk", }; + "uart0_syn_gclk", }; static const char *uart1_parents[] = { "pll5_clk", "osc_24m_clk", - "uart1_synth_gate_clk", }; -static const char *c3_parents[] = { "pll5_clk", "c3_synth_gate_clk", }; -static const char *gmac_phy_input_parents[] = { "gmii_125m_pad_clk", "pll2_clk", + "uart1_syn_gclk", }; +static const char *c3_parents[] = { "pll5_clk", "c3_syn_gclk", }; +static const char *gmac_phy_input_parents[] = { "gmii_pad_clk", "pll2_clk", "osc_25m_clk", }; -static const char *gmac_phy_parents[] = { "gmac_phy_input_mux_clk", - "gmac_phy_synth_gate_clk", }; +static const char *gmac_phy_parents[] = { "phy_input_mclk", "phy_syn_gclk", }; static const char *clcd_synth_parents[] = { "vco1div4_clk", "pll2_clk", }; -static const char *clcd_pixel_parents[] = { "pll5_clk", "clcd_synth_clk", }; +static const char *clcd_pixel_parents[] = { "pll5_clk", "clcd_syn_clk", }; static const char *i2s_src_parents[] = { "vco1div2_clk", "pll2_clk", "pll3_clk", "i2s_src_pad_clk", }; -static const char *i2s_ref_parents[] = { "i2s_src_mux_clk", "i2s_prs1_clk", }; -static const char *spdif_out_parents[] = { "i2s_src_pad_clk", "gen_synth2_clk", -}; -static const char *spdif_in_parents[] = { "pll2_clk", "gen_synth3_clk", }; +static const char *i2s_ref_parents[] = { "i2s_src_mclk", "i2s_prs1_clk", }; +static const char *spdif_out_parents[] = { "i2s_src_pad_clk", "gen_syn2_clk", }; +static const char *spdif_in_parents[] = { "pll2_clk", "gen_syn3_clk", }; static const char *gen_synth0_1_parents[] = { "vco1div4_clk", "vco3div2_clk", "pll3_clk", }; @@ -415,9 +413,9 @@ void __init spear1340_clk_init(void) 25000000); clk_register_clkdev(clk, "osc_25m_clk", NULL); - clk = clk_register_fixed_rate(NULL, "gmii_125m_pad_clk", NULL, - CLK_IS_ROOT, 125000000); - clk_register_clkdev(clk, "gmii_125m_pad_clk", NULL); + clk = clk_register_fixed_rate(NULL, "gmii_pad_clk", NULL, CLK_IS_ROOT, + 125000000); + clk_register_clkdev(clk, "gmii_pad_clk", NULL); clk = clk_register_fixed_rate(NULL, "i2s_src_pad_clk", NULL, CLK_IS_ROOT, 12288000); @@ -431,35 +429,35 @@ void __init spear1340_clk_init(void) /* clock derived from 24 or 25 MHz osc clk */ /* vco-pll */ - clk = clk_register_mux(NULL, "vco1_mux_clk", vco_parents, + clk = clk_register_mux(NULL, "vco1_mclk", vco_parents, ARRAY_SIZE(vco_parents), 0, SPEAR1340_PLL_CFG, SPEAR1340_PLL1_CLK_SHIFT, SPEAR1340_PLL_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "vco1_mux_clk", NULL); - clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mux_clk", - 0, SPEAR1340_PLL1_CTR, SPEAR1340_PLL1_FRQ, pll_rtbl, + clk_register_clkdev(clk, "vco1_mclk", NULL); + clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mclk", 0, + SPEAR1340_PLL1_CTR, SPEAR1340_PLL1_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL); clk_register_clkdev(clk, "vco1_clk", NULL); clk_register_clkdev(clk1, "pll1_clk", NULL); - clk = clk_register_mux(NULL, "vco2_mux_clk", vco_parents, + clk = clk_register_mux(NULL, "vco2_mclk", vco_parents, ARRAY_SIZE(vco_parents), 0, SPEAR1340_PLL_CFG, SPEAR1340_PLL2_CLK_SHIFT, SPEAR1340_PLL_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "vco2_mux_clk", NULL); - clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mux_clk", - 0, SPEAR1340_PLL2_CTR, SPEAR1340_PLL2_FRQ, pll_rtbl, + clk_register_clkdev(clk, "vco2_mclk", NULL); + clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mclk", 0, + SPEAR1340_PLL2_CTR, SPEAR1340_PLL2_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL); clk_register_clkdev(clk, "vco2_clk", NULL); clk_register_clkdev(clk1, "pll2_clk", NULL); - clk = clk_register_mux(NULL, "vco3_mux_clk", vco_parents, + clk = clk_register_mux(NULL, "vco3_mclk", vco_parents, ARRAY_SIZE(vco_parents), 0, SPEAR1340_PLL_CFG, SPEAR1340_PLL3_CLK_SHIFT, SPEAR1340_PLL_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "vco3_mux_clk", NULL); - clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mux_clk", - 0, SPEAR1340_PLL3_CTR, SPEAR1340_PLL3_FRQ, pll_rtbl, + clk_register_clkdev(clk, "vco3_mclk", NULL); + clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mclk", 0, + SPEAR1340_PLL3_CTR, SPEAR1340_PLL3_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL); clk_register_clkdev(clk, "vco3_clk", NULL); clk_register_clkdev(clk1, "pll3_clk", NULL); @@ -498,7 +496,7 @@ void __init spear1340_clk_init(void) /* peripherals */ clk_register_fixed_factor(NULL, "thermal_clk", "osc_24m_clk", 0, 1, 128); - clk = clk_register_gate(NULL, "thermal_gate_clk", "thermal_clk", 0, + clk = clk_register_gate(NULL, "thermal_gclk", "thermal_clk", 0, SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_THSENS_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spear_thermal"); @@ -509,23 +507,23 @@ void __init spear1340_clk_init(void) clk_register_clkdev(clk, "ddr_clk", NULL); /* clock derived from pll1 clk */ - clk = clk_register_frac("sys_synth_clk", "vco1div2_clk", 0, + clk = clk_register_frac("sys_syn_clk", "vco1div2_clk", 0, SPEAR1340_SYS_CLK_SYNT, sys_synth_rtbl, ARRAY_SIZE(sys_synth_rtbl), &_lock); - clk_register_clkdev(clk, "sys_synth_clk", NULL); + clk_register_clkdev(clk, "sys_syn_clk", NULL); - clk = clk_register_frac("amba_synth_clk", "vco1div2_clk", 0, + clk = clk_register_frac("amba_syn_clk", "vco1div2_clk", 0, SPEAR1340_AMBA_CLK_SYNT, amba_synth_rtbl, ARRAY_SIZE(amba_synth_rtbl), &_lock); - clk_register_clkdev(clk, "amba_synth_clk", NULL); + clk_register_clkdev(clk, "amba_syn_clk", NULL); - clk = clk_register_mux(NULL, "sys_mux_clk", sys_parents, + clk = clk_register_mux(NULL, "sys_mclk", sys_parents, ARRAY_SIZE(sys_parents), 0, SPEAR1340_SYS_CLK_CTRL, SPEAR1340_SCLK_SRC_SEL_SHIFT, SPEAR1340_SCLK_SRC_SEL_MASK, 0, &_lock); clk_register_clkdev(clk, "sys_clk", NULL); - clk = clk_register_fixed_factor(NULL, "cpu_clk", "sys_mux_clk", 0, 1, + clk = clk_register_fixed_factor(NULL, "cpu_clk", "sys_mclk", 0, 1, 2); clk_register_clkdev(clk, "cpu_clk", NULL); @@ -548,194 +546,193 @@ void __init spear1340_clk_init(void) clk_register_clkdev(clk, "apb_clk", NULL); /* gpt clocks */ - clk = clk_register_mux(NULL, "gpt0_mux_clk", gpt_parents, + clk = clk_register_mux(NULL, "gpt0_mclk", gpt_parents, ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GPT0_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt0_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mux_clk", 0, + clk_register_clkdev(clk, "gpt0_mclk", NULL); + clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_GPT0_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt0"); - clk = clk_register_mux(NULL, "gpt1_mux_clk", gpt_parents, + clk = clk_register_mux(NULL, "gpt1_mclk", gpt_parents, ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GPT1_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt1_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mux_clk", 0, + clk_register_clkdev(clk, "gpt1_mclk", NULL); + clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_GPT1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt1"); - clk = clk_register_mux(NULL, "gpt2_mux_clk", gpt_parents, + clk = clk_register_mux(NULL, "gpt2_mclk", gpt_parents, ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GPT2_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt2_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mux_clk", 0, + clk_register_clkdev(clk, "gpt2_mclk", NULL); + clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0, SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_GPT2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt2"); - clk = clk_register_mux(NULL, "gpt3_mux_clk", gpt_parents, + clk = clk_register_mux(NULL, "gpt3_mclk", gpt_parents, ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GPT3_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt3_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mux_clk", 0, + clk_register_clkdev(clk, "gpt3_mclk", NULL); + clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mclk", 0, SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_GPT3_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt3"); /* others */ - clk = clk_register_aux("uart0_synth_clk", "uart0_synth_gate_clk", + clk = clk_register_aux("uart0_syn_clk", "uart0_syn_gclk", "vco1div2_clk", 0, SPEAR1340_UART0_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "uart0_synth_clk", NULL); - clk_register_clkdev(clk1, "uart0_synth_gate_clk", NULL); + clk_register_clkdev(clk, "uart0_syn_clk", NULL); + clk_register_clkdev(clk1, "uart0_syn_gclk", NULL); - clk = clk_register_mux(NULL, "uart0_mux_clk", uart0_parents, + clk = clk_register_mux(NULL, "uart0_mclk", uart0_parents, ARRAY_SIZE(uart0_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_UART0_CLK_SHIFT, SPEAR1340_UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart0_mux_clk", NULL); + clk_register_clkdev(clk, "uart0_mclk", NULL); - clk = clk_register_gate(NULL, "uart0_clk", "uart0_mux_clk", 0, + clk = clk_register_gate(NULL, "uart0_clk", "uart0_mclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_UART0_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "e0000000.serial"); - clk = clk_register_aux("uart1_synth_clk", "uart1_synth_gate_clk", + clk = clk_register_aux("uart1_syn_clk", "uart1_syn_gclk", "vco1div2_clk", 0, SPEAR1340_UART1_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "uart1_synth_clk", NULL); - clk_register_clkdev(clk1, "uart1_synth_gate_clk", NULL); + clk_register_clkdev(clk, "uart1_syn_clk", NULL); + clk_register_clkdev(clk1, "uart1_syn_gclk", NULL); - clk = clk_register_mux(NULL, "uart1_mux_clk", uart1_parents, + clk = clk_register_mux(NULL, "uart1_mclk", uart1_parents, ARRAY_SIZE(uart1_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_UART1_CLK_SHIFT, SPEAR1340_UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart1_mux_clk", NULL); + clk_register_clkdev(clk, "uart1_mclk", NULL); - clk = clk_register_gate(NULL, "uart1_clk", "uart1_mux_clk", 0, + clk = clk_register_gate(NULL, "uart1_clk", "uart1_mclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_UART1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "b4100000.serial"); - clk = clk_register_aux("sdhci_synth_clk", "sdhci_synth_gate_clk", + clk = clk_register_aux("sdhci_syn_clk", "sdhci_syn_gclk", "vco1div2_clk", 0, SPEAR1340_SDHCI_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "sdhci_synth_clk", NULL); - clk_register_clkdev(clk1, "sdhci_synth_gate_clk", NULL); + clk_register_clkdev(clk, "sdhci_syn_clk", NULL); + clk_register_clkdev(clk1, "sdhci_syn_gclk", NULL); - clk = clk_register_gate(NULL, "sdhci_clk", "sdhci_synth_gate_clk", 0, + clk = clk_register_gate(NULL, "sdhci_clk", "sdhci_syn_gclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_SDHCI_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "b3000000.sdhci"); - clk = clk_register_aux("cfxd_synth_clk", "cfxd_synth_gate_clk", - "vco1div2_clk", 0, SPEAR1340_CFXD_CLK_SYNT, NULL, - aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "cfxd_synth_clk", NULL); - clk_register_clkdev(clk1, "cfxd_synth_gate_clk", NULL); + clk = clk_register_aux("cfxd_syn_clk", "cfxd_syn_gclk", "vco1div2_clk", + 0, SPEAR1340_CFXD_CLK_SYNT, NULL, aux_rtbl, + ARRAY_SIZE(aux_rtbl), &_lock, &clk1); + clk_register_clkdev(clk, "cfxd_syn_clk", NULL); + clk_register_clkdev(clk1, "cfxd_syn_gclk", NULL); - clk = clk_register_gate(NULL, "cfxd_clk", "cfxd_synth_gate_clk", 0, + clk = clk_register_gate(NULL, "cfxd_clk", "cfxd_syn_gclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_CFXD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "b2800000.cf"); clk_register_clkdev(clk, NULL, "arasan_xd"); - clk = clk_register_aux("c3_synth_clk", "c3_synth_gate_clk", - "vco1div2_clk", 0, SPEAR1340_C3_CLK_SYNT, NULL, - aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "c3_synth_clk", NULL); - clk_register_clkdev(clk1, "c3_synth_gate_clk", NULL); + clk = clk_register_aux("c3_syn_clk", "c3_syn_gclk", "vco1div2_clk", 0, + SPEAR1340_C3_CLK_SYNT, NULL, aux_rtbl, + ARRAY_SIZE(aux_rtbl), &_lock, &clk1); + clk_register_clkdev(clk, "c3_syn_clk", NULL); + clk_register_clkdev(clk1, "c3_syn_gclk", NULL); - clk = clk_register_mux(NULL, "c3_mux_clk", c3_parents, + clk = clk_register_mux(NULL, "c3_mclk", c3_parents, ARRAY_SIZE(c3_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_C3_CLK_SHIFT, SPEAR1340_C3_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "c3_mux_clk", NULL); + clk_register_clkdev(clk, "c3_mclk", NULL); - clk = clk_register_gate(NULL, "c3_clk", "c3_mux_clk", 0, + clk = clk_register_gate(NULL, "c3_clk", "c3_mclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_C3_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "c3"); /* gmac */ - clk = clk_register_mux(NULL, "gmac_phy_input_mux_clk", - gmac_phy_input_parents, + clk = clk_register_mux(NULL, "phy_input_mclk", gmac_phy_input_parents, ARRAY_SIZE(gmac_phy_input_parents), 0, SPEAR1340_GMAC_CLK_CFG, SPEAR1340_GMAC_PHY_INPUT_CLK_SHIFT, SPEAR1340_GMAC_PHY_INPUT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gmac_phy_input_mux_clk", NULL); + clk_register_clkdev(clk, "phy_input_mclk", NULL); - clk = clk_register_aux("gmac_phy_synth_clk", "gmac_phy_synth_gate_clk", - "gmac_phy_input_mux_clk", 0, SPEAR1340_GMAC_CLK_SYNT, - NULL, gmac_rtbl, ARRAY_SIZE(gmac_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "gmac_phy_synth_clk", NULL); - clk_register_clkdev(clk1, "gmac_phy_synth_gate_clk", NULL); + clk = clk_register_aux("phy_syn_clk", "phy_syn_gclk", "phy_input_mclk", + 0, SPEAR1340_GMAC_CLK_SYNT, NULL, gmac_rtbl, + ARRAY_SIZE(gmac_rtbl), &_lock, &clk1); + clk_register_clkdev(clk, "phy_syn_clk", NULL); + clk_register_clkdev(clk1, "phy_syn_gclk", NULL); - clk = clk_register_mux(NULL, "gmac_phy_mux_clk", gmac_phy_parents, + clk = clk_register_mux(NULL, "phy_mclk", gmac_phy_parents, ARRAY_SIZE(gmac_phy_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GMAC_PHY_CLK_SHIFT, SPEAR1340_GMAC_PHY_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, NULL, "stmmacphy.0"); /* clcd */ - clk = clk_register_mux(NULL, "clcd_synth_mux_clk", clcd_synth_parents, + clk = clk_register_mux(NULL, "clcd_syn_mclk", clcd_synth_parents, ARRAY_SIZE(clcd_synth_parents), 0, SPEAR1340_CLCD_CLK_SYNT, SPEAR1340_CLCD_SYNT_CLK_SHIFT, SPEAR1340_CLCD_SYNT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "clcd_synth_mux_clk", NULL); + clk_register_clkdev(clk, "clcd_syn_mclk", NULL); - clk = clk_register_frac("clcd_synth_clk", "clcd_synth_mux_clk", 0, + clk = clk_register_frac("clcd_syn_clk", "clcd_syn_mclk", 0, SPEAR1340_CLCD_CLK_SYNT, clcd_rtbl, ARRAY_SIZE(clcd_rtbl), &_lock); - clk_register_clkdev(clk, "clcd_synth_clk", NULL); + clk_register_clkdev(clk, "clcd_syn_clk", NULL); - clk = clk_register_mux(NULL, "clcd_pixel_mux_clk", clcd_pixel_parents, + clk = clk_register_mux(NULL, "clcd_pixel_mclk", clcd_pixel_parents, ARRAY_SIZE(clcd_pixel_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_CLCD_CLK_SHIFT, SPEAR1340_CLCD_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, "clcd_pixel_clk", NULL); - clk = clk_register_gate(NULL, "clcd_clk", "clcd_pixel_mux_clk", 0, + clk = clk_register_gate(NULL, "clcd_clk", "clcd_pixel_mclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_CLCD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, "clcd_clk", NULL); /* i2s */ - clk = clk_register_mux(NULL, "i2s_src_mux_clk", i2s_src_parents, + clk = clk_register_mux(NULL, "i2s_src_mclk", i2s_src_parents, ARRAY_SIZE(i2s_src_parents), 0, SPEAR1340_I2S_CLK_CFG, SPEAR1340_I2S_SRC_CLK_SHIFT, SPEAR1340_I2S_SRC_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, "i2s_src_clk", NULL); - clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mux_clk", 0, + clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mclk", 0, SPEAR1340_I2S_CLK_CFG, &i2s_prs1_masks, i2s_prs1_rtbl, ARRAY_SIZE(i2s_prs1_rtbl), &_lock, NULL); clk_register_clkdev(clk, "i2s_prs1_clk", NULL); - clk = clk_register_mux(NULL, "i2s_ref_mux_clk", i2s_ref_parents, + clk = clk_register_mux(NULL, "i2s_ref_mclk", i2s_ref_parents, ARRAY_SIZE(i2s_ref_parents), 0, SPEAR1340_I2S_CLK_CFG, SPEAR1340_I2S_REF_SHIFT, SPEAR1340_I2S_REF_SEL_MASK, 0, &_lock); clk_register_clkdev(clk, "i2s_ref_clk", NULL); - clk = clk_register_gate(NULL, "i2s_ref_pad_clk", "i2s_ref_mux_clk", 0, + clk = clk_register_gate(NULL, "i2s_ref_pad_clk", "i2s_ref_mclk", 0, SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_I2S_REF_PAD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, "i2s_ref_pad_clk", NULL); - clk = clk_register_aux("i2s_sclk_clk", "i2s_sclk_gate_clk", - "i2s_ref_mux_clk", 0, SPEAR1340_I2S_CLK_CFG, - &i2s_sclk_masks, i2s_sclk_rtbl, - ARRAY_SIZE(i2s_sclk_rtbl), &_lock, &clk1); + clk = clk_register_aux("i2s_sclk_clk", "i2s_sclk_gclk", "i2s_ref_mclk", + 0, SPEAR1340_I2S_CLK_CFG, &i2s_sclk_masks, + i2s_sclk_rtbl, ARRAY_SIZE(i2s_sclk_rtbl), &_lock, + &clk1); clk_register_clkdev(clk, "i2s_sclk_clk", NULL); - clk_register_clkdev(clk1, "i2s_sclk_gate_clk", NULL); + clk_register_clkdev(clk1, "i2s_sclk_gclk", NULL); /* clock derived from ahb clk */ clk = clk_register_gate(NULL, "i2c0_clk", "ahb_clk", 0, @@ -800,13 +797,13 @@ void __init spear1340_clk_init(void) &_lock); clk_register_clkdev(clk, "sysram1_clk", NULL); - clk = clk_register_aux("adc_synth_clk", "adc_synth_gate_clk", "ahb_clk", + clk = clk_register_aux("adc_syn_clk", "adc_syn_gclk", "ahb_clk", 0, SPEAR1340_ADC_CLK_SYNT, NULL, adc_rtbl, ARRAY_SIZE(adc_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "adc_synth_clk", NULL); - clk_register_clkdev(clk1, "adc_synth_gate_clk", NULL); + clk_register_clkdev(clk, "adc_syn_clk", NULL); + clk_register_clkdev(clk1, "adc_syn_gclk", NULL); - clk = clk_register_gate(NULL, "adc_clk", "adc_synth_gate_clk", 0, + clk = clk_register_gate(NULL, "adc_clk", "adc_syn_gclk", 0, SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_ADC_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "adc_clk"); @@ -843,39 +840,39 @@ void __init spear1340_clk_init(void) clk_register_clkdev(clk, NULL, "e0300000.kbd"); /* RAS clks */ - clk = clk_register_mux(NULL, "gen_synth0_1_mux_clk", - gen_synth0_1_parents, ARRAY_SIZE(gen_synth0_1_parents), - 0, SPEAR1340_PLL_CFG, SPEAR1340_GEN_SYNT0_1_CLK_SHIFT, + clk = clk_register_mux(NULL, "gen_syn0_1_mclk", gen_synth0_1_parents, + ARRAY_SIZE(gen_synth0_1_parents), 0, SPEAR1340_PLL_CFG, + SPEAR1340_GEN_SYNT0_1_CLK_SHIFT, SPEAR1340_GEN_SYNT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gen_synth0_1_clk", NULL); + clk_register_clkdev(clk, "gen_syn0_1_clk", NULL); - clk = clk_register_mux(NULL, "gen_synth2_3_mux_clk", - gen_synth2_3_parents, ARRAY_SIZE(gen_synth2_3_parents), - 0, SPEAR1340_PLL_CFG, SPEAR1340_GEN_SYNT2_3_CLK_SHIFT, + clk = clk_register_mux(NULL, "gen_syn2_3_mclk", gen_synth2_3_parents, + ARRAY_SIZE(gen_synth2_3_parents), 0, SPEAR1340_PLL_CFG, + SPEAR1340_GEN_SYNT2_3_CLK_SHIFT, SPEAR1340_GEN_SYNT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gen_synth2_3_clk", NULL); + clk_register_clkdev(clk, "gen_syn2_3_clk", NULL); - clk = clk_register_frac("gen_synth0_clk", "gen_synth0_1_clk", 0, + clk = clk_register_frac("gen_syn0_clk", "gen_syn0_1_clk", 0, SPEAR1340_GEN_CLK_SYNT0, gen_rtbl, ARRAY_SIZE(gen_rtbl), &_lock); - clk_register_clkdev(clk, "gen_synth0_clk", NULL); + clk_register_clkdev(clk, "gen_syn0_clk", NULL); - clk = clk_register_frac("gen_synth1_clk", "gen_synth0_1_clk", 0, + clk = clk_register_frac("gen_syn1_clk", "gen_syn0_1_clk", 0, SPEAR1340_GEN_CLK_SYNT1, gen_rtbl, ARRAY_SIZE(gen_rtbl), &_lock); - clk_register_clkdev(clk, "gen_synth1_clk", NULL); + clk_register_clkdev(clk, "gen_syn1_clk", NULL); - clk = clk_register_frac("gen_synth2_clk", "gen_synth2_3_clk", 0, + clk = clk_register_frac("gen_syn2_clk", "gen_syn2_3_clk", 0, SPEAR1340_GEN_CLK_SYNT2, gen_rtbl, ARRAY_SIZE(gen_rtbl), &_lock); - clk_register_clkdev(clk, "gen_synth2_clk", NULL); + clk_register_clkdev(clk, "gen_syn2_clk", NULL); - clk = clk_register_frac("gen_synth3_clk", "gen_synth2_3_clk", 0, + clk = clk_register_frac("gen_syn3_clk", "gen_syn2_3_clk", 0, SPEAR1340_GEN_CLK_SYNT3, gen_rtbl, ARRAY_SIZE(gen_rtbl), &_lock); - clk_register_clkdev(clk, "gen_synth3_clk", NULL); + clk_register_clkdev(clk, "gen_syn3_clk", NULL); - clk = clk_register_gate(NULL, "mali_clk", "gen_synth3_clk", 0, + clk = clk_register_gate(NULL, "mali_clk", "gen_syn3_clk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_MALI_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "mali"); @@ -890,74 +887,74 @@ void __init spear1340_clk_init(void) &_lock); clk_register_clkdev(clk, NULL, "spear_cec.1"); - clk = clk_register_mux(NULL, "spdif_out_mux_clk", spdif_out_parents, + clk = clk_register_mux(NULL, "spdif_out_mclk", spdif_out_parents, ARRAY_SIZE(spdif_out_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_SPDIF_OUT_CLK_SHIFT, SPEAR1340_SPDIF_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "spdif_out_mux_clk", NULL); + clk_register_clkdev(clk, "spdif_out_mclk", NULL); - clk = clk_register_gate(NULL, "spdif_out_clk", "spdif_out_mux_clk", 0, + clk = clk_register_gate(NULL, "spdif_out_clk", "spdif_out_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_SPDIF_OUT_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spdif-out"); - clk = clk_register_mux(NULL, "spdif_in_mux_clk", spdif_in_parents, + clk = clk_register_mux(NULL, "spdif_in_mclk", spdif_in_parents, ARRAY_SIZE(spdif_in_parents), 0, SPEAR1340_PERIP_CLK_CFG, SPEAR1340_SPDIF_IN_CLK_SHIFT, SPEAR1340_SPDIF_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "spdif_in_mux_clk", NULL); + clk_register_clkdev(clk, "spdif_in_mclk", NULL); - clk = clk_register_gate(NULL, "spdif_in_clk", "spdif_in_mux_clk", 0, + clk = clk_register_gate(NULL, "spdif_in_clk", "spdif_in_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_SPDIF_IN_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spdif-in"); - clk = clk_register_gate(NULL, "acp_clk", "acp_mux_clk", 0, + clk = clk_register_gate(NULL, "acp_clk", "acp_mclk", 0, SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_ACP_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "acp_clk"); - clk = clk_register_gate(NULL, "plgpio_clk", "plgpio_mux_clk", 0, + clk = clk_register_gate(NULL, "plgpio_clk", "plgpio_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_PLGPIO_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "plgpio"); - clk = clk_register_gate(NULL, "video_dec_clk", "video_dec_mux_clk", 0, + clk = clk_register_gate(NULL, "video_dec_clk", "video_dec_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_VIDEO_DEC_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "video_dec"); - clk = clk_register_gate(NULL, "video_enc_clk", "video_enc_mux_clk", 0, + clk = clk_register_gate(NULL, "video_enc_clk", "video_enc_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_VIDEO_ENC_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "video_enc"); - clk = clk_register_gate(NULL, "video_in_clk", "video_in_mux_clk", 0, + clk = clk_register_gate(NULL, "video_in_clk", "video_in_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_VIDEO_IN_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spear_vip"); - clk = clk_register_gate(NULL, "cam0_clk", "cam0_mux_clk", 0, + clk = clk_register_gate(NULL, "cam0_clk", "cam0_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_CAM0_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spear_camif.0"); - clk = clk_register_gate(NULL, "cam1_clk", "cam1_mux_clk", 0, + clk = clk_register_gate(NULL, "cam1_clk", "cam1_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_CAM1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spear_camif.1"); - clk = clk_register_gate(NULL, "cam2_clk", "cam2_mux_clk", 0, + clk = clk_register_gate(NULL, "cam2_clk", "cam2_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_CAM2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spear_camif.2"); - clk = clk_register_gate(NULL, "cam3_clk", "cam3_mux_clk", 0, + clk = clk_register_gate(NULL, "cam3_clk", "cam3_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_CAM3_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spear_camif.3"); - clk = clk_register_gate(NULL, "pwm_clk", "pwm_mux_clk", 0, + clk = clk_register_gate(NULL, "pwm_clk", "pwm_mclk", 0, SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_PWM_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "pwm"); -- cgit v1.2.3 From e28f1aa110c919716188b979c4404e4c8e9794b9 Mon Sep 17 00:00:00 2001 From: Vipul Kumar Samar Date: Tue, 10 Jul 2012 17:12:44 +0530 Subject: clk:spear1310:Fix: Rename clk ids within predefined limit The max limit of con_id is 16 and dev_id is 20. As of now for spear1310, many clk ids are exceeding this predefined limit. This patch is intended to rename clk ids like: mux_clk -> _mclk gate_clk -> _gclk synth_clk -> syn_clk gmac_phy -> phy_ gmii_125m_pad -> gmii_pad Signed-off-by: Vipul Kumar Samar Signed-off-by: Shiraz Hashim Acked-by: Viresh Kumar Acked-by: Arnd Bergmann --- drivers/clk/spear/spear1310_clock.c | 312 ++++++++++++++++++------------------ 1 file changed, 155 insertions(+), 157 deletions(-) diff --git a/drivers/clk/spear/spear1310_clock.c b/drivers/clk/spear/spear1310_clock.c index 8f05652d53e6..0fcec2aae19c 100644 --- a/drivers/clk/spear/spear1310_clock.c +++ b/drivers/clk/spear/spear1310_clock.c @@ -345,31 +345,30 @@ static struct frac_rate_tbl gen_rtbl[] = { /* clock parents */ static const char *vco_parents[] = { "osc_24m_clk", "osc_25m_clk", }; static const char *gpt_parents[] = { "osc_24m_clk", "apb_clk", }; -static const char *uart0_parents[] = { "pll5_clk", "uart_synth_gate_clk", }; -static const char *c3_parents[] = { "pll5_clk", "c3_synth_gate_clk", }; -static const char *gmac_phy_input_parents[] = { "gmii_125m_pad_clk", "pll2_clk", +static const char *uart0_parents[] = { "pll5_clk", "uart_syn_gclk", }; +static const char *c3_parents[] = { "pll5_clk", "c3_syn_gclk", }; +static const char *gmac_phy_input_parents[] = { "gmii_pad_clk", "pll2_clk", "osc_25m_clk", }; -static const char *gmac_phy_parents[] = { "gmac_phy_input_mux_clk", - "gmac_phy_synth_gate_clk", }; +static const char *gmac_phy_parents[] = { "phy_input_mclk", "phy_syn_gclk", }; static const char *clcd_synth_parents[] = { "vco1div4_clk", "pll2_clk", }; -static const char *clcd_pixel_parents[] = { "pll5_clk", "clcd_synth_clk", }; +static const char *clcd_pixel_parents[] = { "pll5_clk", "clcd_syn_clk", }; static const char *i2s_src_parents[] = { "vco1div2_clk", "none", "pll3_clk", "i2s_src_pad_clk", }; -static const char *i2s_ref_parents[] = { "i2s_src_mux_clk", "i2s_prs1_clk", }; +static const char *i2s_ref_parents[] = { "i2s_src_mclk", "i2s_prs1_clk", }; static const char *gen_synth0_1_parents[] = { "vco1div4_clk", "vco3div2_clk", "pll3_clk", }; static const char *gen_synth2_3_parents[] = { "vco1div4_clk", "vco3div2_clk", "pll2_clk", }; static const char *rmii_phy_parents[] = { "ras_tx50_clk", "none", - "ras_pll2_clk", "ras_synth0_clk", }; + "ras_pll2_clk", "ras_syn0_clk", }; static const char *smii_rgmii_phy_parents[] = { "none", "ras_tx125_clk", - "ras_pll2_clk", "ras_synth0_clk", }; -static const char *uart_parents[] = { "ras_apb_clk", "gen_synth3_clk", }; -static const char *i2c_parents[] = { "ras_apb_clk", "gen_synth1_clk", }; -static const char *ssp1_parents[] = { "ras_apb_clk", "gen_synth1_clk", + "ras_pll2_clk", "ras_syn0_clk", }; +static const char *uart_parents[] = { "ras_apb_clk", "gen_syn3_clk", }; +static const char *i2c_parents[] = { "ras_apb_clk", "gen_syn1_clk", }; +static const char *ssp1_parents[] = { "ras_apb_clk", "gen_syn1_clk", "ras_plclk0_clk", }; -static const char *pci_parents[] = { "ras_pll3_clk", "gen_synth2_clk", }; -static const char *tdm_parents[] = { "ras_pll3_clk", "gen_synth1_clk", }; +static const char *pci_parents[] = { "ras_pll3_clk", "gen_syn2_clk", }; +static const char *tdm_parents[] = { "ras_pll3_clk", "gen_syn1_clk", }; void __init spear1310_clk_init(void) { @@ -390,9 +389,9 @@ void __init spear1310_clk_init(void) 25000000); clk_register_clkdev(clk, "osc_25m_clk", NULL); - clk = clk_register_fixed_rate(NULL, "gmii_125m_pad_clk", NULL, - CLK_IS_ROOT, 125000000); - clk_register_clkdev(clk, "gmii_125m_pad_clk", NULL); + clk = clk_register_fixed_rate(NULL, "gmii_pad_clk", NULL, CLK_IS_ROOT, + 125000000); + clk_register_clkdev(clk, "gmii_pad_clk", NULL); clk = clk_register_fixed_rate(NULL, "i2s_src_pad_clk", NULL, CLK_IS_ROOT, 12288000); @@ -406,34 +405,34 @@ void __init spear1310_clk_init(void) /* clock derived from 24 or 25 MHz osc clk */ /* vco-pll */ - clk = clk_register_mux(NULL, "vco1_mux_clk", vco_parents, + clk = clk_register_mux(NULL, "vco1_mclk", vco_parents, ARRAY_SIZE(vco_parents), 0, SPEAR1310_PLL_CFG, SPEAR1310_PLL1_CLK_SHIFT, SPEAR1310_PLL_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "vco1_mux_clk", NULL); - clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mux_clk", + clk_register_clkdev(clk, "vco1_mclk", NULL); + clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mclk", 0, SPEAR1310_PLL1_CTR, SPEAR1310_PLL1_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL); clk_register_clkdev(clk, "vco1_clk", NULL); clk_register_clkdev(clk1, "pll1_clk", NULL); - clk = clk_register_mux(NULL, "vco2_mux_clk", vco_parents, + clk = clk_register_mux(NULL, "vco2_mclk", vco_parents, ARRAY_SIZE(vco_parents), 0, SPEAR1310_PLL_CFG, SPEAR1310_PLL2_CLK_SHIFT, SPEAR1310_PLL_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "vco2_mux_clk", NULL); - clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mux_clk", + clk_register_clkdev(clk, "vco2_mclk", NULL); + clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mclk", 0, SPEAR1310_PLL2_CTR, SPEAR1310_PLL2_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL); clk_register_clkdev(clk, "vco2_clk", NULL); clk_register_clkdev(clk1, "pll2_clk", NULL); - clk = clk_register_mux(NULL, "vco3_mux_clk", vco_parents, + clk = clk_register_mux(NULL, "vco3_mclk", vco_parents, ARRAY_SIZE(vco_parents), 0, SPEAR1310_PLL_CFG, SPEAR1310_PLL3_CLK_SHIFT, SPEAR1310_PLL_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "vco3_mux_clk", NULL); - clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mux_clk", + clk_register_clkdev(clk, "vco3_mclk", NULL); + clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mclk", 0, SPEAR1310_PLL3_CTR, SPEAR1310_PLL3_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL); clk_register_clkdev(clk, "vco3_clk", NULL); @@ -473,7 +472,7 @@ void __init spear1310_clk_init(void) /* peripherals */ clk_register_fixed_factor(NULL, "thermal_clk", "osc_24m_clk", 0, 1, 128); - clk = clk_register_gate(NULL, "thermal_gate_clk", "thermal_clk", 0, + clk = clk_register_gate(NULL, "thermal_gclk", "thermal_clk", 0, SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_THSENS_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "spear_thermal"); @@ -500,177 +499,176 @@ void __init spear1310_clk_init(void) clk_register_clkdev(clk, "apb_clk", NULL); /* gpt clocks */ - clk = clk_register_mux(NULL, "gpt0_mux_clk", gpt_parents, + clk = clk_register_mux(NULL, "gpt0_mclk", gpt_parents, ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GPT0_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt0_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mux_clk", 0, + clk_register_clkdev(clk, "gpt0_mclk", NULL); + clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mclk", 0, SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_GPT0_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt0"); - clk = clk_register_mux(NULL, "gpt1_mux_clk", gpt_parents, + clk = clk_register_mux(NULL, "gpt1_mclk", gpt_parents, ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GPT1_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt1_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mux_clk", 0, + clk_register_clkdev(clk, "gpt1_mclk", NULL); + clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0, SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_GPT1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt1"); - clk = clk_register_mux(NULL, "gpt2_mux_clk", gpt_parents, + clk = clk_register_mux(NULL, "gpt2_mclk", gpt_parents, ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GPT2_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt2_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mux_clk", 0, + clk_register_clkdev(clk, "gpt2_mclk", NULL); + clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0, SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_GPT2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt2"); - clk = clk_register_mux(NULL, "gpt3_mux_clk", gpt_parents, + clk = clk_register_mux(NULL, "gpt3_mclk", gpt_parents, ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GPT3_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt3_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mux_clk", 0, + clk_register_clkdev(clk, "gpt3_mclk", NULL); + clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mclk", 0, SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_GPT3_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt3"); /* others */ - clk = clk_register_aux("uart_synth_clk", "uart_synth_gate_clk", - "vco1div2_clk", 0, SPEAR1310_UART_CLK_SYNT, NULL, - aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "uart_synth_clk", NULL); - clk_register_clkdev(clk1, "uart_synth_gate_clk", NULL); + clk = clk_register_aux("uart_syn_clk", "uart_syn_gclk", "vco1div2_clk", + 0, SPEAR1310_UART_CLK_SYNT, NULL, aux_rtbl, + ARRAY_SIZE(aux_rtbl), &_lock, &clk1); + clk_register_clkdev(clk, "uart_syn_clk", NULL); + clk_register_clkdev(clk1, "uart_syn_gclk", NULL); - clk = clk_register_mux(NULL, "uart0_mux_clk", uart0_parents, + clk = clk_register_mux(NULL, "uart0_mclk", uart0_parents, ARRAY_SIZE(uart0_parents), 0, SPEAR1310_PERIP_CLK_CFG, SPEAR1310_UART_CLK_SHIFT, SPEAR1310_UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart0_mux_clk", NULL); + clk_register_clkdev(clk, "uart0_mclk", NULL); - clk = clk_register_gate(NULL, "uart0_clk", "uart0_mux_clk", 0, + clk = clk_register_gate(NULL, "uart0_clk", "uart0_mclk", 0, SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_UART_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "e0000000.serial"); - clk = clk_register_aux("sdhci_synth_clk", "sdhci_synth_gate_clk", + clk = clk_register_aux("sdhci_syn_clk", "sdhci_syn_gclk", "vco1div2_clk", 0, SPEAR1310_SDHCI_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "sdhci_synth_clk", NULL); - clk_register_clkdev(clk1, "sdhci_synth_gate_clk", NULL); + clk_register_clkdev(clk, "sdhci_syn_clk", NULL); + clk_register_clkdev(clk1, "sdhci_syn_gclk", NULL); - clk = clk_register_gate(NULL, "sdhci_clk", "sdhci_synth_gate_clk", 0, + clk = clk_register_gate(NULL, "sdhci_clk", "sdhci_syn_gclk", 0, SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_SDHCI_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "b3000000.sdhci"); - clk = clk_register_aux("cfxd_synth_clk", "cfxd_synth_gate_clk", - "vco1div2_clk", 0, SPEAR1310_CFXD_CLK_SYNT, NULL, - aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "cfxd_synth_clk", NULL); - clk_register_clkdev(clk1, "cfxd_synth_gate_clk", NULL); + clk = clk_register_aux("cfxd_syn_clk", "cfxd_syn_gclk", "vco1div2_clk", + 0, SPEAR1310_CFXD_CLK_SYNT, NULL, aux_rtbl, + ARRAY_SIZE(aux_rtbl), &_lock, &clk1); + clk_register_clkdev(clk, "cfxd_syn_clk", NULL); + clk_register_clkdev(clk1, "cfxd_syn_gclk", NULL); - clk = clk_register_gate(NULL, "cfxd_clk", "cfxd_synth_gate_clk", 0, + clk = clk_register_gate(NULL, "cfxd_clk", "cfxd_syn_gclk", 0, SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_CFXD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "b2800000.cf"); clk_register_clkdev(clk, NULL, "arasan_xd"); - clk = clk_register_aux("c3_synth_clk", "c3_synth_gate_clk", - "vco1div2_clk", 0, SPEAR1310_C3_CLK_SYNT, NULL, - aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "c3_synth_clk", NULL); - clk_register_clkdev(clk1, "c3_synth_gate_clk", NULL); + clk = clk_register_aux("c3_syn_clk", "c3_syn_gclk", "vco1div2_clk", + 0, SPEAR1310_C3_CLK_SYNT, NULL, aux_rtbl, + ARRAY_SIZE(aux_rtbl), &_lock, &clk1); + clk_register_clkdev(clk, "c3_syn_clk", NULL); + clk_register_clkdev(clk1, "c3_syn_gclk", NULL); - clk = clk_register_mux(NULL, "c3_mux_clk", c3_parents, + clk = clk_register_mux(NULL, "c3_mclk", c3_parents, ARRAY_SIZE(c3_parents), 0, SPEAR1310_PERIP_CLK_CFG, SPEAR1310_C3_CLK_SHIFT, SPEAR1310_C3_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "c3_mux_clk", NULL); + clk_register_clkdev(clk, "c3_mclk", NULL); - clk = clk_register_gate(NULL, "c3_clk", "c3_mux_clk", 0, + clk = clk_register_gate(NULL, "c3_clk", "c3_mclk", 0, SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_C3_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "c3"); /* gmac */ - clk = clk_register_mux(NULL, "gmac_phy_input_mux_clk", - gmac_phy_input_parents, + clk = clk_register_mux(NULL, "phy_input_mclk", gmac_phy_input_parents, ARRAY_SIZE(gmac_phy_input_parents), 0, SPEAR1310_GMAC_CLK_CFG, SPEAR1310_GMAC_PHY_INPUT_CLK_SHIFT, SPEAR1310_GMAC_PHY_INPUT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gmac_phy_input_mux_clk", NULL); + clk_register_clkdev(clk, "phy_input_mclk", NULL); - clk = clk_register_aux("gmac_phy_synth_clk", "gmac_phy_synth_gate_clk", - "gmac_phy_input_mux_clk", 0, SPEAR1310_GMAC_CLK_SYNT, - NULL, gmac_rtbl, ARRAY_SIZE(gmac_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "gmac_phy_synth_clk", NULL); - clk_register_clkdev(clk1, "gmac_phy_synth_gate_clk", NULL); + clk = clk_register_aux("phy_syn_clk", "phy_syn_gclk", "phy_input_mclk", + 0, SPEAR1310_GMAC_CLK_SYNT, NULL, gmac_rtbl, + ARRAY_SIZE(gmac_rtbl), &_lock, &clk1); + clk_register_clkdev(clk, "phy_syn_clk", NULL); + clk_register_clkdev(clk1, "phy_syn_gclk", NULL); - clk = clk_register_mux(NULL, "gmac_phy_mux_clk", gmac_phy_parents, + clk = clk_register_mux(NULL, "phy_mclk", gmac_phy_parents, ARRAY_SIZE(gmac_phy_parents), 0, SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GMAC_PHY_CLK_SHIFT, SPEAR1310_GMAC_PHY_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, NULL, "stmmacphy.0"); /* clcd */ - clk = clk_register_mux(NULL, "clcd_synth_mux_clk", clcd_synth_parents, + clk = clk_register_mux(NULL, "clcd_syn_mclk", clcd_synth_parents, ARRAY_SIZE(clcd_synth_parents), 0, SPEAR1310_CLCD_CLK_SYNT, SPEAR1310_CLCD_SYNT_CLK_SHIFT, SPEAR1310_CLCD_SYNT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "clcd_synth_mux_clk", NULL); + clk_register_clkdev(clk, "clcd_syn_mclk", NULL); - clk = clk_register_frac("clcd_synth_clk", "clcd_synth_mux_clk", 0, + clk = clk_register_frac("clcd_syn_clk", "clcd_syn_mclk", 0, SPEAR1310_CLCD_CLK_SYNT, clcd_rtbl, ARRAY_SIZE(clcd_rtbl), &_lock); - clk_register_clkdev(clk, "clcd_synth_clk", NULL); + clk_register_clkdev(clk, "clcd_syn_clk", NULL); - clk = clk_register_mux(NULL, "clcd_pixel_mux_clk", clcd_pixel_parents, + clk = clk_register_mux(NULL, "clcd_pixel_mclk", clcd_pixel_parents, ARRAY_SIZE(clcd_pixel_parents), 0, SPEAR1310_PERIP_CLK_CFG, SPEAR1310_CLCD_CLK_SHIFT, SPEAR1310_CLCD_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, "clcd_pixel_clk", NULL); - clk = clk_register_gate(NULL, "clcd_clk", "clcd_pixel_mux_clk", 0, + clk = clk_register_gate(NULL, "clcd_clk", "clcd_pixel_mclk", 0, SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_CLCD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, "clcd_clk", NULL); /* i2s */ - clk = clk_register_mux(NULL, "i2s_src_mux_clk", i2s_src_parents, + clk = clk_register_mux(NULL, "i2s_src_mclk", i2s_src_parents, ARRAY_SIZE(i2s_src_parents), 0, SPEAR1310_I2S_CLK_CFG, SPEAR1310_I2S_SRC_CLK_SHIFT, SPEAR1310_I2S_SRC_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, "i2s_src_clk", NULL); - clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mux_clk", 0, + clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mclk", 0, SPEAR1310_I2S_CLK_CFG, &i2s_prs1_masks, i2s_prs1_rtbl, ARRAY_SIZE(i2s_prs1_rtbl), &_lock, NULL); clk_register_clkdev(clk, "i2s_prs1_clk", NULL); - clk = clk_register_mux(NULL, "i2s_ref_mux_clk", i2s_ref_parents, + clk = clk_register_mux(NULL, "i2s_ref_mclk", i2s_ref_parents, ARRAY_SIZE(i2s_ref_parents), 0, SPEAR1310_I2S_CLK_CFG, SPEAR1310_I2S_REF_SHIFT, SPEAR1310_I2S_REF_SEL_MASK, 0, &_lock); clk_register_clkdev(clk, "i2s_ref_clk", NULL); - clk = clk_register_gate(NULL, "i2s_ref_pad_clk", "i2s_ref_mux_clk", 0, + clk = clk_register_gate(NULL, "i2s_ref_pad_clk", "i2s_ref_mclk", 0, SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_I2S_REF_PAD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, "i2s_ref_pad_clk", NULL); - clk = clk_register_aux("i2s_sclk_clk", "i2s_sclk_gate_clk", + clk = clk_register_aux("i2s_sclk_clk", "i2s_sclk_gclk", "i2s_ref_pad_clk", 0, SPEAR1310_I2S_CLK_CFG, &i2s_sclk_masks, i2s_sclk_rtbl, ARRAY_SIZE(i2s_sclk_rtbl), &_lock, &clk1); clk_register_clkdev(clk, "i2s_sclk_clk", NULL); - clk_register_clkdev(clk1, "i2s_sclk_gate_clk", NULL); + clk_register_clkdev(clk1, "i2s_sclk_gclk", NULL); /* clock derived from ahb clk */ clk = clk_register_gate(NULL, "i2c0_clk", "ahb_clk", 0, @@ -747,13 +745,13 @@ void __init spear1310_clk_init(void) &_lock); clk_register_clkdev(clk, "sysram1_clk", NULL); - clk = clk_register_aux("adc_synth_clk", "adc_synth_gate_clk", "ahb_clk", + clk = clk_register_aux("adc_syn_clk", "adc_syn_gclk", "ahb_clk", 0, SPEAR1310_ADC_CLK_SYNT, NULL, adc_rtbl, ARRAY_SIZE(adc_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "adc_synth_clk", NULL); - clk_register_clkdev(clk1, "adc_synth_gate_clk", NULL); + clk_register_clkdev(clk, "adc_syn_clk", NULL); + clk_register_clkdev(clk1, "adc_syn_gclk", NULL); - clk = clk_register_gate(NULL, "adc_clk", "adc_synth_gate_clk", 0, + clk = clk_register_gate(NULL, "adc_clk", "adc_syn_gclk", 0, SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_ADC_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "adc_clk"); @@ -790,37 +788,37 @@ void __init spear1310_clk_init(void) clk_register_clkdev(clk, NULL, "e0300000.kbd"); /* RAS clks */ - clk = clk_register_mux(NULL, "gen_synth0_1_mux_clk", - gen_synth0_1_parents, ARRAY_SIZE(gen_synth0_1_parents), - 0, SPEAR1310_PLL_CFG, SPEAR1310_RAS_SYNT0_1_CLK_SHIFT, + clk = clk_register_mux(NULL, "gen_syn0_1_mclk", gen_synth0_1_parents, + ARRAY_SIZE(gen_synth0_1_parents), 0, SPEAR1310_PLL_CFG, + SPEAR1310_RAS_SYNT0_1_CLK_SHIFT, SPEAR1310_RAS_SYNT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gen_synth0_1_clk", NULL); + clk_register_clkdev(clk, "gen_syn0_1_clk", NULL); - clk = clk_register_mux(NULL, "gen_synth2_3_mux_clk", - gen_synth2_3_parents, ARRAY_SIZE(gen_synth2_3_parents), - 0, SPEAR1310_PLL_CFG, SPEAR1310_RAS_SYNT2_3_CLK_SHIFT, + clk = clk_register_mux(NULL, "gen_syn2_3_mclk", gen_synth2_3_parents, + ARRAY_SIZE(gen_synth2_3_parents), 0, SPEAR1310_PLL_CFG, + SPEAR1310_RAS_SYNT2_3_CLK_SHIFT, SPEAR1310_RAS_SYNT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gen_synth2_3_clk", NULL); + clk_register_clkdev(clk, "gen_syn2_3_clk", NULL); - clk = clk_register_frac("gen_synth0_clk", "gen_synth0_1_clk", 0, + clk = clk_register_frac("gen_syn0_clk", "gen_syn0_1_clk", 0, SPEAR1310_RAS_CLK_SYNT0, gen_rtbl, ARRAY_SIZE(gen_rtbl), &_lock); - clk_register_clkdev(clk, "gen_synth0_clk", NULL); + clk_register_clkdev(clk, "gen_syn0_clk", NULL); - clk = clk_register_frac("gen_synth1_clk", "gen_synth0_1_clk", 0, + clk = clk_register_frac("gen_syn1_clk", "gen_syn0_1_clk", 0, SPEAR1310_RAS_CLK_SYNT1, gen_rtbl, ARRAY_SIZE(gen_rtbl), &_lock); - clk_register_clkdev(clk, "gen_synth1_clk", NULL); + clk_register_clkdev(clk, "gen_syn1_clk", NULL); - clk = clk_register_frac("gen_synth2_clk", "gen_synth2_3_clk", 0, + clk = clk_register_frac("gen_syn2_clk", "gen_syn2_3_clk", 0, SPEAR1310_RAS_CLK_SYNT2, gen_rtbl, ARRAY_SIZE(gen_rtbl), &_lock); - clk_register_clkdev(clk, "gen_synth2_clk", NULL); + clk_register_clkdev(clk, "gen_syn2_clk", NULL); - clk = clk_register_frac("gen_synth3_clk", "gen_synth2_3_clk", 0, + clk = clk_register_frac("gen_syn3_clk", "gen_syn2_3_clk", 0, SPEAR1310_RAS_CLK_SYNT3, gen_rtbl, ARRAY_SIZE(gen_rtbl), &_lock); - clk_register_clkdev(clk, "gen_synth3_clk", NULL); + clk_register_clkdev(clk, "gen_syn3_clk", NULL); clk = clk_register_gate(NULL, "ras_osc_24m_clk", "osc_24m_clk", 0, SPEAR1310_RAS_CLK_ENB, SPEAR1310_OSC_24M_CLK_ENB, 0, @@ -847,7 +845,7 @@ void __init spear1310_clk_init(void) &_lock); clk_register_clkdev(clk, "ras_pll3_clk", NULL); - clk = clk_register_gate(NULL, "ras_tx125_clk", "gmii_125m_pad_clk", 0, + clk = clk_register_gate(NULL, "ras_tx125_clk", "gmii_pad_clk", 0, SPEAR1310_RAS_CLK_ENB, SPEAR1310_C125M_PAD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, "ras_tx125_clk", NULL); @@ -912,7 +910,7 @@ void __init spear1310_clk_init(void) &_lock); clk_register_clkdev(clk, NULL, "5c700000.eth"); - clk = clk_register_mux(NULL, "smii_rgmii_phy_mux_clk", + clk = clk_register_mux(NULL, "smii_rgmii_phy_mclk", smii_rgmii_phy_parents, ARRAY_SIZE(smii_rgmii_phy_parents), 0, SPEAR1310_RAS_CTRL_REG1, @@ -922,184 +920,184 @@ void __init spear1310_clk_init(void) clk_register_clkdev(clk, NULL, "stmmacphy.2"); clk_register_clkdev(clk, NULL, "stmmacphy.4"); - clk = clk_register_mux(NULL, "rmii_phy_mux_clk", rmii_phy_parents, + clk = clk_register_mux(NULL, "rmii_phy_mclk", rmii_phy_parents, ARRAY_SIZE(rmii_phy_parents), 0, SPEAR1310_RAS_CTRL_REG1, SPEAR1310_RMII_PHY_CLK_SHIFT, SPEAR1310_PHY_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, NULL, "stmmacphy.3"); - clk = clk_register_mux(NULL, "uart1_mux_clk", uart_parents, + clk = clk_register_mux(NULL, "uart1_mclk", uart_parents, ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_UART1_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart1_mux_clk", NULL); + clk_register_clkdev(clk, "uart1_mclk", NULL); - clk = clk_register_gate(NULL, "uart1_clk", "uart1_mux_clk", 0, + clk = clk_register_gate(NULL, "uart1_clk", "uart1_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5c800000.serial"); - clk = clk_register_mux(NULL, "uart2_mux_clk", uart_parents, + clk = clk_register_mux(NULL, "uart2_mclk", uart_parents, ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_UART2_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart2_mux_clk", NULL); + clk_register_clkdev(clk, "uart2_mclk", NULL); - clk = clk_register_gate(NULL, "uart2_clk", "uart2_mux_clk", 0, + clk = clk_register_gate(NULL, "uart2_clk", "uart2_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5c900000.serial"); - clk = clk_register_mux(NULL, "uart3_mux_clk", uart_parents, + clk = clk_register_mux(NULL, "uart3_mclk", uart_parents, ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_UART3_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart3_mux_clk", NULL); + clk_register_clkdev(clk, "uart3_mclk", NULL); - clk = clk_register_gate(NULL, "uart3_clk", "uart3_mux_clk", 0, + clk = clk_register_gate(NULL, "uart3_clk", "uart3_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART3_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5ca00000.serial"); - clk = clk_register_mux(NULL, "uart4_mux_clk", uart_parents, + clk = clk_register_mux(NULL, "uart4_mclk", uart_parents, ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_UART4_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart4_mux_clk", NULL); + clk_register_clkdev(clk, "uart4_mclk", NULL); - clk = clk_register_gate(NULL, "uart4_clk", "uart4_mux_clk", 0, + clk = clk_register_gate(NULL, "uart4_clk", "uart4_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART4_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5cb00000.serial"); - clk = clk_register_mux(NULL, "uart5_mux_clk", uart_parents, + clk = clk_register_mux(NULL, "uart5_mclk", uart_parents, ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_UART5_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart5_mux_clk", NULL); + clk_register_clkdev(clk, "uart5_mclk", NULL); - clk = clk_register_gate(NULL, "uart5_clk", "uart5_mux_clk", 0, + clk = clk_register_gate(NULL, "uart5_clk", "uart5_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_UART5_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5cc00000.serial"); - clk = clk_register_mux(NULL, "i2c1_mux_clk", i2c_parents, + clk = clk_register_mux(NULL, "i2c1_mclk", i2c_parents, ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C1_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "i2c1_mux_clk", NULL); + clk_register_clkdev(clk, "i2c1_mclk", NULL); - clk = clk_register_gate(NULL, "i2c1_clk", "i2c1_mux_clk", 0, + clk = clk_register_gate(NULL, "i2c1_clk", "i2c1_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5cd00000.i2c"); - clk = clk_register_mux(NULL, "i2c2_mux_clk", i2c_parents, + clk = clk_register_mux(NULL, "i2c2_mclk", i2c_parents, ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C2_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "i2c2_mux_clk", NULL); + clk_register_clkdev(clk, "i2c2_mclk", NULL); - clk = clk_register_gate(NULL, "i2c2_clk", "i2c2_mux_clk", 0, + clk = clk_register_gate(NULL, "i2c2_clk", "i2c2_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5ce00000.i2c"); - clk = clk_register_mux(NULL, "i2c3_mux_clk", i2c_parents, + clk = clk_register_mux(NULL, "i2c3_mclk", i2c_parents, ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C3_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "i2c3_mux_clk", NULL); + clk_register_clkdev(clk, "i2c3_mclk", NULL); - clk = clk_register_gate(NULL, "i2c3_clk", "i2c3_mux_clk", 0, + clk = clk_register_gate(NULL, "i2c3_clk", "i2c3_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C3_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5cf00000.i2c"); - clk = clk_register_mux(NULL, "i2c4_mux_clk", i2c_parents, + clk = clk_register_mux(NULL, "i2c4_mclk", i2c_parents, ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C4_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "i2c4_mux_clk", NULL); + clk_register_clkdev(clk, "i2c4_mclk", NULL); - clk = clk_register_gate(NULL, "i2c4_clk", "i2c4_mux_clk", 0, + clk = clk_register_gate(NULL, "i2c4_clk", "i2c4_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C4_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5d000000.i2c"); - clk = clk_register_mux(NULL, "i2c5_mux_clk", i2c_parents, + clk = clk_register_mux(NULL, "i2c5_mclk", i2c_parents, ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C5_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "i2c5_mux_clk", NULL); + clk_register_clkdev(clk, "i2c5_mclk", NULL); - clk = clk_register_gate(NULL, "i2c5_clk", "i2c5_mux_clk", 0, + clk = clk_register_gate(NULL, "i2c5_clk", "i2c5_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C5_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5d100000.i2c"); - clk = clk_register_mux(NULL, "i2c6_mux_clk", i2c_parents, + clk = clk_register_mux(NULL, "i2c6_mclk", i2c_parents, ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C6_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "i2c6_mux_clk", NULL); + clk_register_clkdev(clk, "i2c6_mclk", NULL); - clk = clk_register_gate(NULL, "i2c6_clk", "i2c6_mux_clk", 0, + clk = clk_register_gate(NULL, "i2c6_clk", "i2c6_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C6_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5d200000.i2c"); - clk = clk_register_mux(NULL, "i2c7_mux_clk", i2c_parents, + clk = clk_register_mux(NULL, "i2c7_mclk", i2c_parents, ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C7_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "i2c7_mux_clk", NULL); + clk_register_clkdev(clk, "i2c7_mclk", NULL); - clk = clk_register_gate(NULL, "i2c7_clk", "i2c7_mux_clk", 0, + clk = clk_register_gate(NULL, "i2c7_clk", "i2c7_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_I2C7_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5d300000.i2c"); - clk = clk_register_mux(NULL, "ssp1_mux_clk", ssp1_parents, + clk = clk_register_mux(NULL, "ssp1_mclk", ssp1_parents, ARRAY_SIZE(ssp1_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_SSP1_CLK_SHIFT, SPEAR1310_SSP1_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "ssp1_mux_clk", NULL); + clk_register_clkdev(clk, "ssp1_mclk", NULL); - clk = clk_register_gate(NULL, "ssp1_clk", "ssp1_mux_clk", 0, + clk = clk_register_gate(NULL, "ssp1_clk", "ssp1_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_SSP1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "5d400000.spi"); - clk = clk_register_mux(NULL, "pci_mux_clk", pci_parents, + clk = clk_register_mux(NULL, "pci_mclk", pci_parents, ARRAY_SIZE(pci_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_PCI_CLK_SHIFT, SPEAR1310_PCI_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "pci_mux_clk", NULL); + clk_register_clkdev(clk, "pci_mclk", NULL); - clk = clk_register_gate(NULL, "pci_clk", "pci_mux_clk", 0, + clk = clk_register_gate(NULL, "pci_clk", "pci_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_PCI_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "pci"); - clk = clk_register_mux(NULL, "tdm1_mux_clk", tdm_parents, + clk = clk_register_mux(NULL, "tdm1_mclk", tdm_parents, ARRAY_SIZE(tdm_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_TDM1_CLK_SHIFT, SPEAR1310_TDM_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "tdm1_mux_clk", NULL); + clk_register_clkdev(clk, "tdm1_mclk", NULL); - clk = clk_register_gate(NULL, "tdm1_clk", "tdm1_mux_clk", 0, + clk = clk_register_gate(NULL, "tdm1_clk", "tdm1_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_TDM1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "tdm_hdlc.0"); - clk = clk_register_mux(NULL, "tdm2_mux_clk", tdm_parents, + clk = clk_register_mux(NULL, "tdm2_mclk", tdm_parents, ARRAY_SIZE(tdm_parents), 0, SPEAR1310_RAS_CTRL_REG0, SPEAR1310_TDM2_CLK_SHIFT, SPEAR1310_TDM_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "tdm2_mux_clk", NULL); + clk_register_clkdev(clk, "tdm2_mclk", NULL); - clk = clk_register_gate(NULL, "tdm2_clk", "tdm2_mux_clk", 0, + clk = clk_register_gate(NULL, "tdm2_clk", "tdm2_mclk", 0, SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_TDM2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "tdm_hdlc.1"); -- cgit v1.2.3 From 5cfc545f50c4b6c0800e578b51019f2ecf490f1e Mon Sep 17 00:00:00 2001 From: Vipul Kumar Samar Date: Tue, 10 Jul 2012 17:12:45 +0530 Subject: Clk:spear3xx:Fix: Rename clk ids within predefined limit The max limit of con_id is 16 and dev_id is 20. As of now for spear3xx, many clk ids are exceeding this predefined limit. This patch is intended to rename clk ids like: mux_clk -> _mclk gate_clk -> _gclk synth_clk -> syn_clk ras_gen1_synth_gate_clk -> ras_syn1_gclk ras_pll3_48m -> ras_pll3_ pll3_48m -> pll3_ Signed-off-by: Vipul Kumar Samar Signed-off-by: Shiraz Hashim Acked-by: Viresh Kumar Acked-by: Arnd Bergmann --- arch/arm/mach-spear3xx/spear3xx.c | 2 +- drivers/clk/spear/spear3xx_clock.c | 180 ++++++++++++++++++------------------- 2 files changed, 87 insertions(+), 95 deletions(-) diff --git a/arch/arm/mach-spear3xx/spear3xx.c b/arch/arm/mach-spear3xx/spear3xx.c index 0f41bd1c47c3..66db5f13af84 100644 --- a/arch/arm/mach-spear3xx/spear3xx.c +++ b/arch/arm/mach-spear3xx/spear3xx.c @@ -87,7 +87,7 @@ void __init spear3xx_map_io(void) static void __init spear3xx_timer_init(void) { - char pclk_name[] = "pll3_48m_clk"; + char pclk_name[] = "pll3_clk"; struct clk *gpt_clk, *pclk; spear3xx_clk_init(); diff --git a/drivers/clk/spear/spear3xx_clock.c b/drivers/clk/spear/spear3xx_clock.c index 01dd6daff2a1..c3157454bb3f 100644 --- a/drivers/clk/spear/spear3xx_clock.c +++ b/drivers/clk/spear/spear3xx_clock.c @@ -122,12 +122,12 @@ static struct gpt_rate_tbl gpt_rtbl[] = { }; /* clock parents */ -static const char *uart0_parents[] = { "pll3_48m_clk", "uart_synth_gate_clk", }; -static const char *firda_parents[] = { "pll3_48m_clk", "firda_synth_gate_clk", +static const char *uart0_parents[] = { "pll3_clk", "uart_syn_gclk", }; +static const char *firda_parents[] = { "pll3_clk", "firda_syn_gclk", }; -static const char *gpt0_parents[] = { "pll3_48m_clk", "gpt0_synth_clk", }; -static const char *gpt1_parents[] = { "pll3_48m_clk", "gpt1_synth_clk", }; -static const char *gpt2_parents[] = { "pll3_48m_clk", "gpt2_synth_clk", }; +static const char *gpt0_parents[] = { "pll3_clk", "gpt0_syn_clk", }; +static const char *gpt1_parents[] = { "pll3_clk", "gpt1_syn_clk", }; +static const char *gpt2_parents[] = { "pll3_clk", "gpt2_syn_clk", }; static const char *gen2_3_parents[] = { "pll1_clk", "pll2_clk", }; static const char *ddr_parents[] = { "ahb_clk", "ahbmult2_clk", "none", "pll2_clk", }; @@ -137,7 +137,7 @@ static void __init spear300_clk_init(void) { struct clk *clk; - clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_48m_clk", 0, + clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_clk", 0, 1, 1); clk_register_clkdev(clk, NULL, "60000000.clcd"); @@ -219,15 +219,11 @@ static void __init spear310_clk_init(void) #define SPEAR320_UARTX_PCLK_VAL_SYNTH1 0x0 #define SPEAR320_UARTX_PCLK_VAL_APB 0x1 -static const char *i2s_ref_parents[] = { "ras_pll2_clk", - "ras_gen2_synth_gate_clk", }; -static const char *sdhci_parents[] = { "ras_pll3_48m_clk", - "ras_gen3_synth_gate_clk", -}; +static const char *i2s_ref_parents[] = { "ras_pll2_clk", "ras_syn2_gclk", }; +static const char *sdhci_parents[] = { "ras_pll3_clk", "ras_syn3_gclk", }; static const char *smii0_parents[] = { "smii_125m_pad", "ras_pll2_clk", - "ras_gen0_synth_gate_clk", }; -static const char *uartx_parents[] = { "ras_gen1_synth_gate_clk", "ras_apb_clk", -}; + "ras_syn0_gclk", }; +static const char *uartx_parents[] = { "ras_syn1_gclk", "ras_apb_clk", }; static void __init spear320_clk_init(void) { @@ -237,7 +233,7 @@ static void __init spear320_clk_init(void) CLK_IS_ROOT, 125000000); clk_register_clkdev(clk, "smii_125m_pad", NULL); - clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_48m_clk", 0, + clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_clk", 0, 1, 1); clk_register_clkdev(clk, NULL, "90000000.clcd"); @@ -363,9 +359,9 @@ void __init spear3xx_clk_init(void) clk_register_clkdev(clk, NULL, "fc900000.rtc"); /* clock derived from 24 MHz osc clk */ - clk = clk_register_fixed_rate(NULL, "pll3_48m_clk", "osc_24m_clk", 0, + clk = clk_register_fixed_rate(NULL, "pll3_clk", "osc_24m_clk", 0, 48000000); - clk_register_clkdev(clk, "pll3_48m_clk", NULL); + clk_register_clkdev(clk, "pll3_clk", NULL); clk = clk_register_fixed_factor(NULL, "wdt_clk", "osc_24m_clk", 0, 1, 1); @@ -392,98 +388,98 @@ void __init spear3xx_clk_init(void) HCLK_RATIO_MASK, 0, &_lock); clk_register_clkdev(clk, "ahb_clk", NULL); - clk = clk_register_aux("uart_synth_clk", "uart_synth_gate_clk", - "pll1_clk", 0, UART_CLK_SYNT, NULL, aux_rtbl, - ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "uart_synth_clk", NULL); - clk_register_clkdev(clk1, "uart_synth_gate_clk", NULL); + clk = clk_register_aux("uart_syn_clk", "uart_syn_gclk", "pll1_clk", 0, + UART_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), + &_lock, &clk1); + clk_register_clkdev(clk, "uart_syn_clk", NULL); + clk_register_clkdev(clk1, "uart_syn_gclk", NULL); - clk = clk_register_mux(NULL, "uart0_mux_clk", uart0_parents, + clk = clk_register_mux(NULL, "uart0_mclk", uart0_parents, ARRAY_SIZE(uart0_parents), 0, PERIP_CLK_CFG, UART_CLK_SHIFT, UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart0_mux_clk", NULL); + clk_register_clkdev(clk, "uart0_mclk", NULL); - clk = clk_register_gate(NULL, "uart0", "uart0_mux_clk", 0, - PERIP1_CLK_ENB, UART_CLK_ENB, 0, &_lock); + clk = clk_register_gate(NULL, "uart0", "uart0_mclk", 0, PERIP1_CLK_ENB, + UART_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "d0000000.serial"); - clk = clk_register_aux("firda_synth_clk", "firda_synth_gate_clk", - "pll1_clk", 0, FIRDA_CLK_SYNT, NULL, aux_rtbl, - ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "firda_synth_clk", NULL); - clk_register_clkdev(clk1, "firda_synth_gate_clk", NULL); + clk = clk_register_aux("firda_syn_clk", "firda_syn_gclk", "pll1_clk", 0, + FIRDA_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), + &_lock, &clk1); + clk_register_clkdev(clk, "firda_syn_clk", NULL); + clk_register_clkdev(clk1, "firda_syn_gclk", NULL); - clk = clk_register_mux(NULL, "firda_mux_clk", firda_parents, + clk = clk_register_mux(NULL, "firda_mclk", firda_parents, ARRAY_SIZE(firda_parents), 0, PERIP_CLK_CFG, FIRDA_CLK_SHIFT, FIRDA_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "firda_mux_clk", NULL); + clk_register_clkdev(clk, "firda_mclk", NULL); - clk = clk_register_gate(NULL, "firda_clk", "firda_mux_clk", 0, + clk = clk_register_gate(NULL, "firda_clk", "firda_mclk", 0, PERIP1_CLK_ENB, FIRDA_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "firda"); /* gpt clocks */ - clk_register_gpt("gpt0_synth_clk", "pll1_clk", 0, PRSC0_CLK_CFG, - gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); + clk_register_gpt("gpt0_syn_clk", "pll1_clk", 0, PRSC0_CLK_CFG, gpt_rtbl, + ARRAY_SIZE(gpt_rtbl), &_lock); clk = clk_register_mux(NULL, "gpt0_clk", gpt0_parents, ARRAY_SIZE(gpt0_parents), 0, PERIP_CLK_CFG, GPT0_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt0"); - clk_register_gpt("gpt1_synth_clk", "pll1_clk", 0, PRSC1_CLK_CFG, - gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); - clk = clk_register_mux(NULL, "gpt1_mux_clk", gpt1_parents, + clk_register_gpt("gpt1_syn_clk", "pll1_clk", 0, PRSC1_CLK_CFG, gpt_rtbl, + ARRAY_SIZE(gpt_rtbl), &_lock); + clk = clk_register_mux(NULL, "gpt1_mclk", gpt1_parents, ARRAY_SIZE(gpt1_parents), 0, PERIP_CLK_CFG, GPT1_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt1_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mux_clk", 0, + clk_register_clkdev(clk, "gpt1_mclk", NULL); + clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0, PERIP1_CLK_ENB, GPT1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt1"); - clk_register_gpt("gpt2_synth_clk", "pll1_clk", 0, PRSC2_CLK_CFG, - gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); - clk = clk_register_mux(NULL, "gpt2_mux_clk", gpt2_parents, + clk_register_gpt("gpt2_syn_clk", "pll1_clk", 0, PRSC2_CLK_CFG, gpt_rtbl, + ARRAY_SIZE(gpt_rtbl), &_lock); + clk = clk_register_mux(NULL, "gpt2_mclk", gpt2_parents, ARRAY_SIZE(gpt2_parents), 0, PERIP_CLK_CFG, GPT2_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt2_mux_clk", NULL); - clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mux_clk", 0, + clk_register_clkdev(clk, "gpt2_mclk", NULL); + clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0, PERIP1_CLK_ENB, GPT2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt2"); /* general synths clocks */ - clk = clk_register_aux("gen0_synth_clk", "gen0_synth_gate_clk", - "pll1_clk", 0, GEN0_CLK_SYNT, NULL, aux_rtbl, - ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "gen0_synth_clk", NULL); - clk_register_clkdev(clk1, "gen0_synth_gate_clk", NULL); - - clk = clk_register_aux("gen1_synth_clk", "gen1_synth_gate_clk", - "pll1_clk", 0, GEN1_CLK_SYNT, NULL, aux_rtbl, - ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "gen1_synth_clk", NULL); - clk_register_clkdev(clk1, "gen1_synth_gate_clk", NULL); - - clk = clk_register_mux(NULL, "gen2_3_parent_clk", gen2_3_parents, + clk = clk_register_aux("gen0_syn_clk", "gen0_syn_gclk", "pll1_clk", + 0, GEN0_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), + &_lock, &clk1); + clk_register_clkdev(clk, "gen0_syn_clk", NULL); + clk_register_clkdev(clk1, "gen0_syn_gclk", NULL); + + clk = clk_register_aux("gen1_syn_clk", "gen1_syn_gclk", "pll1_clk", + 0, GEN1_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), + &_lock, &clk1); + clk_register_clkdev(clk, "gen1_syn_clk", NULL); + clk_register_clkdev(clk1, "gen1_syn_gclk", NULL); + + clk = clk_register_mux(NULL, "gen2_3_par_clk", gen2_3_parents, ARRAY_SIZE(gen2_3_parents), 0, CORE_CLK_CFG, GEN_SYNTH2_3_CLK_SHIFT, GEN_SYNTH2_3_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gen2_3_parent_clk", NULL); + clk_register_clkdev(clk, "gen2_3_par_clk", NULL); - clk = clk_register_aux("gen2_synth_clk", "gen2_synth_gate_clk", - "gen2_3_parent_clk", 0, GEN2_CLK_SYNT, NULL, aux_rtbl, + clk = clk_register_aux("gen2_syn_clk", "gen2_syn_gclk", + "gen2_3_par_clk", 0, GEN2_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "gen2_synth_clk", NULL); - clk_register_clkdev(clk1, "gen2_synth_gate_clk", NULL); + clk_register_clkdev(clk, "gen2_syn_clk", NULL); + clk_register_clkdev(clk1, "gen2_syn_gclk", NULL); - clk = clk_register_aux("gen3_synth_clk", "gen3_synth_gate_clk", - "gen2_3_parent_clk", 0, GEN3_CLK_SYNT, NULL, aux_rtbl, + clk = clk_register_aux("gen3_syn_clk", "gen3_syn_gclk", + "gen2_3_par_clk", 0, GEN3_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "gen3_synth_clk", NULL); - clk_register_clkdev(clk1, "gen3_synth_gate_clk", NULL); + clk_register_clkdev(clk, "gen3_syn_clk", NULL); + clk_register_clkdev(clk1, "gen3_syn_gclk", NULL); /* clock derived from pll3 clk */ - clk = clk_register_gate(NULL, "usbh_clk", "pll3_48m_clk", 0, - PERIP1_CLK_ENB, USBH_CLK_ENB, 0, &_lock); + clk = clk_register_gate(NULL, "usbh_clk", "pll3_clk", 0, PERIP1_CLK_ENB, + USBH_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, "usbh_clk", NULL); clk = clk_register_fixed_factor(NULL, "usbh.0_clk", "usbh_clk", 0, 1, @@ -494,8 +490,8 @@ void __init spear3xx_clk_init(void) 1); clk_register_clkdev(clk, "usbh.1_clk", NULL); - clk = clk_register_gate(NULL, "usbd_clk", "pll3_48m_clk", 0, - PERIP1_CLK_ENB, USBD_CLK_ENB, 0, &_lock); + clk = clk_register_gate(NULL, "usbd_clk", "pll3_clk", 0, PERIP1_CLK_ENB, + USBD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "designware_udc"); /* clock derived from ahb clk */ @@ -579,29 +575,25 @@ void __init spear3xx_clk_init(void) RAS_CLK_ENB, RAS_PLL2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, "ras_pll2_clk", NULL); - clk = clk_register_gate(NULL, "ras_pll3_48m_clk", "pll3_48m_clk", 0, + clk = clk_register_gate(NULL, "ras_pll3_clk", "pll3_clk", 0, RAS_CLK_ENB, RAS_48M_CLK_ENB, 0, &_lock); - clk_register_clkdev(clk, "ras_pll3_48m_clk", NULL); - - clk = clk_register_gate(NULL, "ras_gen0_synth_gate_clk", - "gen0_synth_gate_clk", 0, RAS_CLK_ENB, - RAS_SYNT0_CLK_ENB, 0, &_lock); - clk_register_clkdev(clk, "ras_gen0_synth_gate_clk", NULL); - - clk = clk_register_gate(NULL, "ras_gen1_synth_gate_clk", - "gen1_synth_gate_clk", 0, RAS_CLK_ENB, - RAS_SYNT1_CLK_ENB, 0, &_lock); - clk_register_clkdev(clk, "ras_gen1_synth_gate_clk", NULL); - - clk = clk_register_gate(NULL, "ras_gen2_synth_gate_clk", - "gen2_synth_gate_clk", 0, RAS_CLK_ENB, - RAS_SYNT2_CLK_ENB, 0, &_lock); - clk_register_clkdev(clk, "ras_gen2_synth_gate_clk", NULL); - - clk = clk_register_gate(NULL, "ras_gen3_synth_gate_clk", - "gen3_synth_gate_clk", 0, RAS_CLK_ENB, - RAS_SYNT3_CLK_ENB, 0, &_lock); - clk_register_clkdev(clk, "ras_gen3_synth_gate_clk", NULL); + clk_register_clkdev(clk, "ras_pll3_clk", NULL); + + clk = clk_register_gate(NULL, "ras_syn0_gclk", "gen0_syn_gclk", 0, + RAS_CLK_ENB, RAS_SYNT0_CLK_ENB, 0, &_lock); + clk_register_clkdev(clk, "ras_syn0_gclk", NULL); + + clk = clk_register_gate(NULL, "ras_syn1_gclk", "gen1_syn_gclk", 0, + RAS_CLK_ENB, RAS_SYNT1_CLK_ENB, 0, &_lock); + clk_register_clkdev(clk, "ras_syn1_gclk", NULL); + + clk = clk_register_gate(NULL, "ras_syn2_gclk", "gen2_syn_gclk", 0, + RAS_CLK_ENB, RAS_SYNT2_CLK_ENB, 0, &_lock); + clk_register_clkdev(clk, "ras_syn2_gclk", NULL); + + clk = clk_register_gate(NULL, "ras_syn3_gclk", "gen3_syn_gclk", 0, + RAS_CLK_ENB, RAS_SYNT3_CLK_ENB, 0, &_lock); + clk_register_clkdev(clk, "ras_syn3_gclk", NULL); if (of_machine_is_compatible("st,spear300")) spear300_clk_init(); -- cgit v1.2.3 From a8f4bf0eb4ca7a0a578079fb5807e59b7111a1e2 Mon Sep 17 00:00:00 2001 From: Vipul Kumar Samar Date: Tue, 10 Jul 2012 17:12:46 +0530 Subject: Clk:spear6xx:Fix: Rename clk ids within predefined limit The max limit of con_id is 16 and dev_id is 20. As of now for spear6xx, many clk ids are exceeding this predefined limit. This patch is intended to rename clk ids like: mux_clk -> _mclk gate_clk -> _gclk synth_clk -> syn_clk ras_gen1_synth_gate_clk -> ras_syn1_gclk pll3_48m -> pll3_ Signed-off-by: Vipul Kumar Samar Signed-off-by: Shiraz Hashim Acked-by: Viresh Kumar Acked-by: Arnd Bergmann --- arch/arm/mach-spear6xx/spear6xx.c | 2 +- drivers/clk/spear/spear6xx_clock.c | 122 ++++++++++++++++++------------------- 2 files changed, 61 insertions(+), 63 deletions(-) diff --git a/arch/arm/mach-spear6xx/spear6xx.c b/arch/arm/mach-spear6xx/spear6xx.c index 2e2e3596583e..9af67d003c62 100644 --- a/arch/arm/mach-spear6xx/spear6xx.c +++ b/arch/arm/mach-spear6xx/spear6xx.c @@ -423,7 +423,7 @@ void __init spear6xx_map_io(void) static void __init spear6xx_timer_init(void) { - char pclk_name[] = "pll3_48m_clk"; + char pclk_name[] = "pll3_clk"; struct clk *gpt_clk, *pclk; spear6xx_clk_init(); diff --git a/drivers/clk/spear/spear6xx_clock.c b/drivers/clk/spear/spear6xx_clock.c index 61026ae564ab..a98d0866f541 100644 --- a/drivers/clk/spear/spear6xx_clock.c +++ b/drivers/clk/spear/spear6xx_clock.c @@ -97,13 +97,12 @@ static struct aux_rate_tbl aux_rtbl[] = { {.xscale = 1, .yscale = 2, .eq = 1}, /* 166 MHz */ }; -static const char *clcd_parents[] = { "pll3_48m_clk", "clcd_synth_gate_clk", }; -static const char *firda_parents[] = { "pll3_48m_clk", "firda_synth_gate_clk", -}; -static const char *uart_parents[] = { "pll3_48m_clk", "uart_synth_gate_clk", }; -static const char *gpt0_1_parents[] = { "pll3_48m_clk", "gpt0_1_synth_clk", }; -static const char *gpt2_parents[] = { "pll3_48m_clk", "gpt2_synth_clk", }; -static const char *gpt3_parents[] = { "pll3_48m_clk", "gpt3_synth_clk", }; +static const char *clcd_parents[] = { "pll3_clk", "clcd_syn_gclk", }; +static const char *firda_parents[] = { "pll3_clk", "firda_syn_gclk", }; +static const char *uart_parents[] = { "pll3_clk", "uart_syn_gclk", }; +static const char *gpt0_1_parents[] = { "pll3_clk", "gpt0_1_syn_clk", }; +static const char *gpt2_parents[] = { "pll3_clk", "gpt2_syn_clk", }; +static const char *gpt3_parents[] = { "pll3_clk", "gpt3_syn_clk", }; static const char *ddr_parents[] = { "ahb_clk", "ahbmult2_clk", "none", "pll2_clk", }; @@ -136,9 +135,9 @@ void __init spear6xx_clk_init(void) clk_register_clkdev(clk, NULL, "rtc-spear"); /* clock derived from 30 MHz osc clk */ - clk = clk_register_fixed_rate(NULL, "pll3_48m_clk", "osc_24m_clk", 0, + clk = clk_register_fixed_rate(NULL, "pll3_clk", "osc_24m_clk", 0, 48000000); - clk_register_clkdev(clk, "pll3_48m_clk", NULL); + clk_register_clkdev(clk, "pll3_clk", NULL); clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "osc_30m_clk", 0, PLL1_CTR, PLL1_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), @@ -146,9 +145,9 @@ void __init spear6xx_clk_init(void) clk_register_clkdev(clk, "vco1_clk", NULL); clk_register_clkdev(clk1, "pll1_clk", NULL); - clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, - "osc_30m_clk", 0, PLL2_CTR, PLL2_FRQ, pll_rtbl, - ARRAY_SIZE(pll_rtbl), &_lock, &clk1, NULL); + clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "osc_30m_clk", + 0, PLL2_CTR, PLL2_FRQ, pll_rtbl, ARRAY_SIZE(pll_rtbl), + &_lock, &clk1, NULL); clk_register_clkdev(clk, "vco2_clk", NULL); clk_register_clkdev(clk1, "pll2_clk", NULL); @@ -165,111 +164,111 @@ void __init spear6xx_clk_init(void) HCLK_RATIO_MASK, 0, &_lock); clk_register_clkdev(clk, "ahb_clk", NULL); - clk = clk_register_aux("uart_synth_clk", "uart_synth_gate_clk", - "pll1_clk", 0, UART_CLK_SYNT, NULL, aux_rtbl, - ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "uart_synth_clk", NULL); - clk_register_clkdev(clk1, "uart_synth_gate_clk", NULL); + clk = clk_register_aux("uart_syn_clk", "uart_syn_gclk", "pll1_clk", 0, + UART_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), + &_lock, &clk1); + clk_register_clkdev(clk, "uart_syn_clk", NULL); + clk_register_clkdev(clk1, "uart_syn_gclk", NULL); - clk = clk_register_mux(NULL, "uart_mux_clk", uart_parents, + clk = clk_register_mux(NULL, "uart_mclk", uart_parents, ARRAY_SIZE(uart_parents), 0, PERIP_CLK_CFG, UART_CLK_SHIFT, UART_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "uart_mux_clk", NULL); + clk_register_clkdev(clk, "uart_mclk", NULL); - clk = clk_register_gate(NULL, "uart0", "uart_mux_clk", 0, - PERIP1_CLK_ENB, UART0_CLK_ENB, 0, &_lock); + clk = clk_register_gate(NULL, "uart0", "uart_mclk", 0, PERIP1_CLK_ENB, + UART0_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "d0000000.serial"); - clk = clk_register_gate(NULL, "uart1", "uart_mux_clk", 0, - PERIP1_CLK_ENB, UART1_CLK_ENB, 0, &_lock); + clk = clk_register_gate(NULL, "uart1", "uart_mclk", 0, PERIP1_CLK_ENB, + UART1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "d0080000.serial"); - clk = clk_register_aux("firda_synth_clk", "firda_synth_gate_clk", - "pll1_clk", 0, FIRDA_CLK_SYNT, NULL, aux_rtbl, - ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "firda_synth_clk", NULL); - clk_register_clkdev(clk1, "firda_synth_gate_clk", NULL); + clk = clk_register_aux("firda_syn_clk", "firda_syn_gclk", "pll1_clk", + 0, FIRDA_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), + &_lock, &clk1); + clk_register_clkdev(clk, "firda_syn_clk", NULL); + clk_register_clkdev(clk1, "firda_syn_gclk", NULL); - clk = clk_register_mux(NULL, "firda_mux_clk", firda_parents, + clk = clk_register_mux(NULL, "firda_mclk", firda_parents, ARRAY_SIZE(firda_parents), 0, PERIP_CLK_CFG, FIRDA_CLK_SHIFT, FIRDA_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "firda_mux_clk", NULL); + clk_register_clkdev(clk, "firda_mclk", NULL); - clk = clk_register_gate(NULL, "firda_clk", "firda_mux_clk", 0, + clk = clk_register_gate(NULL, "firda_clk", "firda_mclk", 0, PERIP1_CLK_ENB, FIRDA_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "firda"); - clk = clk_register_aux("clcd_synth_clk", "clcd_synth_gate_clk", - "pll1_clk", 0, CLCD_CLK_SYNT, NULL, aux_rtbl, - ARRAY_SIZE(aux_rtbl), &_lock, &clk1); - clk_register_clkdev(clk, "clcd_synth_clk", NULL); - clk_register_clkdev(clk1, "clcd_synth_gate_clk", NULL); + clk = clk_register_aux("clcd_syn_clk", "clcd_syn_gclk", "pll1_clk", + 0, CLCD_CLK_SYNT, NULL, aux_rtbl, ARRAY_SIZE(aux_rtbl), + &_lock, &clk1); + clk_register_clkdev(clk, "clcd_syn_clk", NULL); + clk_register_clkdev(clk1, "clcd_syn_gclk", NULL); - clk = clk_register_mux(NULL, "clcd_mux_clk", clcd_parents, + clk = clk_register_mux(NULL, "clcd_mclk", clcd_parents, ARRAY_SIZE(clcd_parents), 0, PERIP_CLK_CFG, CLCD_CLK_SHIFT, CLCD_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "clcd_mux_clk", NULL); + clk_register_clkdev(clk, "clcd_mclk", NULL); - clk = clk_register_gate(NULL, "clcd_clk", "clcd_mux_clk", 0, + clk = clk_register_gate(NULL, "clcd_clk", "clcd_mclk", 0, PERIP1_CLK_ENB, CLCD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "clcd"); /* gpt clocks */ - clk = clk_register_gpt("gpt0_1_synth_clk", "pll1_clk", 0, PRSC0_CLK_CFG, + clk = clk_register_gpt("gpt0_1_syn_clk", "pll1_clk", 0, PRSC0_CLK_CFG, gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); - clk_register_clkdev(clk, "gpt0_1_synth_clk", NULL); + clk_register_clkdev(clk, "gpt0_1_syn_clk", NULL); - clk = clk_register_mux(NULL, "gpt0_mux_clk", gpt0_1_parents, + clk = clk_register_mux(NULL, "gpt0_mclk", gpt0_1_parents, ARRAY_SIZE(gpt0_1_parents), 0, PERIP_CLK_CFG, GPT0_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt0"); - clk = clk_register_mux(NULL, "gpt1_mux_clk", gpt0_1_parents, + clk = clk_register_mux(NULL, "gpt1_mclk", gpt0_1_parents, ARRAY_SIZE(gpt0_1_parents), 0, PERIP_CLK_CFG, GPT1_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt1_mux_clk", NULL); + clk_register_clkdev(clk, "gpt1_mclk", NULL); - clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mux_clk", 0, + clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0, PERIP1_CLK_ENB, GPT1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt1"); - clk = clk_register_gpt("gpt2_synth_clk", "pll1_clk", 0, PRSC1_CLK_CFG, + clk = clk_register_gpt("gpt2_syn_clk", "pll1_clk", 0, PRSC1_CLK_CFG, gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); - clk_register_clkdev(clk, "gpt2_synth_clk", NULL); + clk_register_clkdev(clk, "gpt2_syn_clk", NULL); - clk = clk_register_mux(NULL, "gpt2_mux_clk", gpt2_parents, + clk = clk_register_mux(NULL, "gpt2_mclk", gpt2_parents, ARRAY_SIZE(gpt2_parents), 0, PERIP_CLK_CFG, GPT2_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt2_mux_clk", NULL); + clk_register_clkdev(clk, "gpt2_mclk", NULL); - clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mux_clk", 0, + clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0, PERIP1_CLK_ENB, GPT2_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt2"); - clk = clk_register_gpt("gpt3_synth_clk", "pll1_clk", 0, PRSC2_CLK_CFG, + clk = clk_register_gpt("gpt3_syn_clk", "pll1_clk", 0, PRSC2_CLK_CFG, gpt_rtbl, ARRAY_SIZE(gpt_rtbl), &_lock); - clk_register_clkdev(clk, "gpt3_synth_clk", NULL); + clk_register_clkdev(clk, "gpt3_syn_clk", NULL); - clk = clk_register_mux(NULL, "gpt3_mux_clk", gpt3_parents, + clk = clk_register_mux(NULL, "gpt3_mclk", gpt3_parents, ARRAY_SIZE(gpt3_parents), 0, PERIP_CLK_CFG, GPT3_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock); - clk_register_clkdev(clk, "gpt3_mux_clk", NULL); + clk_register_clkdev(clk, "gpt3_mclk", NULL); - clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mux_clk", 0, + clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mclk", 0, PERIP1_CLK_ENB, GPT3_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "gpt3"); /* clock derived from pll3 clk */ - clk = clk_register_gate(NULL, "usbh0_clk", "pll3_48m_clk", 0, + clk = clk_register_gate(NULL, "usbh0_clk", "pll3_clk", 0, PERIP1_CLK_ENB, USBH0_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "usbh.0_clk"); - clk = clk_register_gate(NULL, "usbh1_clk", "pll3_48m_clk", 0, + clk = clk_register_gate(NULL, "usbh1_clk", "pll3_clk", 0, PERIP1_CLK_ENB, USBH1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "usbh.1_clk"); - clk = clk_register_gate(NULL, "usbd_clk", "pll3_48m_clk", 0, - PERIP1_CLK_ENB, USBD_CLK_ENB, 0, &_lock); + clk = clk_register_gate(NULL, "usbd_clk", "pll3_clk", 0, PERIP1_CLK_ENB, + USBD_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "designware_udc"); /* clock derived from ahb clk */ @@ -278,9 +277,8 @@ void __init spear6xx_clk_init(void) clk_register_clkdev(clk, "ahbmult2_clk", NULL); clk = clk_register_mux(NULL, "ddr_clk", ddr_parents, - ARRAY_SIZE(ddr_parents), - 0, PLL_CLK_CFG, MCTR_CLK_SHIFT, MCTR_CLK_MASK, 0, - &_lock); + ARRAY_SIZE(ddr_parents), 0, PLL_CLK_CFG, MCTR_CLK_SHIFT, + MCTR_CLK_MASK, 0, &_lock); clk_register_clkdev(clk, "ddr_clk", NULL); clk = clk_register_divider(NULL, "apb_clk", "ahb_clk", -- cgit v1.2.3 From 465e4f2b19159533e08042f8a113769512385195 Mon Sep 17 00:00:00 2001 From: Vipul Kumar Samar Date: Wed, 4 Jul 2012 18:52:17 +0800 Subject: ARM: SPEAr13xx: Fix Interrupt bindings - Correct interrupt bindings for uart, ethernet and pmu. - Added interrupt binding for keyboard. Signed-off-by: Vipul Kumar Samar Signed-off-by: Shiraz Hashim Acked-by: Viresh Kumar --- arch/arm/boot/dts/spear13xx.dtsi | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi index 10dcec7e7321..f7b84aced654 100644 --- a/arch/arm/boot/dts/spear13xx.dtsi +++ b/arch/arm/boot/dts/spear13xx.dtsi @@ -43,8 +43,8 @@ pmu { compatible = "arm,cortex-a9-pmu"; - interrupts = <0 8 0x04 - 0 9 0x04>; + interrupts = <0 6 0x04 + 0 7 0x04>; }; L2: l2-cache { @@ -119,8 +119,8 @@ gmac0: eth@e2000000 { compatible = "st,spear600-gmac"; reg = <0xe2000000 0x8000>; - interrupts = <0 23 0x4 - 0 24 0x4>; + interrupts = <0 33 0x4 + 0 34 0x4>; interrupt-names = "macirq", "eth_wake_irq"; status = "disabled"; }; @@ -202,6 +202,7 @@ kbd@e0300000 { compatible = "st,spear300-kbd"; reg = <0xe0300000 0x1000>; + interrupts = <0 52 0x4>; status = "disabled"; }; @@ -224,7 +225,7 @@ serial@e0000000 { compatible = "arm,pl011", "arm,primecell"; reg = <0xe0000000 0x1000>; - interrupts = <0 36 0x4>; + interrupts = <0 35 0x4>; status = "disabled"; }; -- cgit v1.2.3 From d9ba8db2157654e2fc159a63c4b6eb8cffb352ae Mon Sep 17 00:00:00 2001 From: Vipul Kumar Samar Date: Wed, 4 Jul 2012 18:52:19 +0800 Subject: clk: SPEAr1340: Fix clk enable register for uart1 and i2c1. This patch is to fix typing mistake of clk enable register of i2c1 and uart1. Signed-off-by: Vipul Kumar Samar Signed-off-by: Shiraz Hashim Acked-by: Viresh Kumar --- drivers/clk/spear/spear1340_clock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/spear/spear1340_clock.c b/drivers/clk/spear/spear1340_clock.c index 0f2324b9321b..44846987ee64 100644 --- a/drivers/clk/spear/spear1340_clock.c +++ b/drivers/clk/spear/spear1340_clock.c @@ -617,7 +617,7 @@ void __init spear1340_clk_init(void) clk_register_clkdev(clk, "uart1_mclk", NULL); clk = clk_register_gate(NULL, "uart1_clk", "uart1_mclk", 0, - SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_UART1_CLK_ENB, 0, + SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_UART1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "b4100000.serial"); @@ -741,7 +741,7 @@ void __init spear1340_clk_init(void) clk_register_clkdev(clk, NULL, "e0280000.i2c"); clk = clk_register_gate(NULL, "i2c1_clk", "ahb_clk", 0, - SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_I2C1_CLK_ENB, 0, + SPEAR1340_PERIP3_CLK_ENB, SPEAR1340_I2C1_CLK_ENB, 0, &_lock); clk_register_clkdev(clk, NULL, "b4000000.i2c"); -- cgit v1.2.3 From d4f513ff12c1d74b379715e78c01002f5d055315 Mon Sep 17 00:00:00 2001 From: Vipul Kumar Samar Date: Fri, 6 Jul 2012 15:52:36 +0530 Subject: Clk: SPEAr1340: Update sys clock parent array sys_clk has multiple parents and selection of parent depends on sys_clk_ctrl register bit no. 23:25, with following possibilities 0XX: pll1_clk 10X: sys_synth_clk 110: pll2_clk 111: pll3_clk Out of several possibilities (h/w wise) to select same clock parent for sys_clk, current clock implementation was considering just one value. When bootloader programmed different (valid) value to select a clock parent then Linux breaks. Here, we try to include all possibilities which can lead to same clock selection thus making Linux independent of bootloader selection values. Signed-off-by: Vipul Kumar Samar Signed-off-by: Shiraz Hashim Acked-by: Viresh Kumar --- drivers/clk/spear/spear1340_clock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/spear/spear1340_clock.c b/drivers/clk/spear/spear1340_clock.c index 44846987ee64..2352cee7f645 100644 --- a/drivers/clk/spear/spear1340_clock.c +++ b/drivers/clk/spear/spear1340_clock.c @@ -369,8 +369,8 @@ static struct frac_rate_tbl gen_rtbl[] = { /* clock parents */ static const char *vco_parents[] = { "osc_24m_clk", "osc_25m_clk", }; -static const char *sys_parents[] = { "none", "pll1_clk", "none", "none", - "sys_syn_clk", "none", "pll2_clk", "pll3_clk", }; +static const char *sys_parents[] = { "pll1_clk", "pll1_clk", "pll1_clk", + "pll1_clk", "sys_synth_clk", "sys_synth_clk", "pll2_clk", "pll3_clk", }; static const char *ahb_parents[] = { "cpu_div3_clk", "amba_syn_clk", }; static const char *gpt_parents[] = { "osc_24m_clk", "apb_clk", }; static const char *uart0_parents[] = { "pll5_clk", "osc_24m_clk", -- cgit v1.2.3 From 45a5e119ad781afca186fd5dccdb70c3c70057a7 Mon Sep 17 00:00:00 2001 From: Vipul Kumar Samar Date: Fri, 13 Jul 2012 17:20:46 +0530 Subject: ARM: dts: SPEAr320: Fix compatible string Signed-off-by: Vipul Kumar Samar Signed-off-by: Shiraz Hashim Acked-by: Viresh Kumar --- arch/arm/boot/dts/spear320-evb.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/spear320-evb.dts b/arch/arm/boot/dts/spear320-evb.dts index c13fd1f3b09f..f28bea8a3ce2 100644 --- a/arch/arm/boot/dts/spear320-evb.dts +++ b/arch/arm/boot/dts/spear320-evb.dts @@ -15,8 +15,8 @@ /include/ "spear320.dtsi" / { - model = "ST SPEAr300 Evaluation Board"; - compatible = "st,spear300-evb", "st,spear300"; + model = "ST SPEAr320 Evaluation Board"; + compatible = "st,spear320-evb", "st,spear320"; #address-cells = <1>; #size-cells = <1>; -- cgit v1.2.3 From 69da52f7eabbb9808ccaf7098ae676429f924e7d Mon Sep 17 00:00:00 2001 From: Vipul Kumar Samar Date: Fri, 13 Jul 2012 17:22:11 +0530 Subject: ARM: dts: SPEAr320: Boot the board in EXTENDED_MODE On spear320 device supported mode are: * AUTO_NET_SMII_MODE * AUTO_NET_MII_MODE * AUTO_EXP_MODE * SMALL_PRINTERS_MODE * EXTENDED_MODE spear320-evb board is designed for EXTENDED_MODE only, hence it does not boot correctly in current form where pinctrl part for some devices fail. Configure and boot the SPEAr320 evaluation board in EXTENDED_MODE. Signed-off-by: Vipul Kumar Samar Signed-off-by: Shiraz Hashim Acked-by: Viresh Kumar --- arch/arm/boot/dts/spear320-evb.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/spear320-evb.dts b/arch/arm/boot/dts/spear320-evb.dts index f28bea8a3ce2..e4e912f95024 100644 --- a/arch/arm/boot/dts/spear320-evb.dts +++ b/arch/arm/boot/dts/spear320-evb.dts @@ -26,7 +26,7 @@ ahb { pinmux@b3000000 { - st,pinmux-mode = <3>; + st,pinmux-mode = <4>; pinctrl-names = "default"; pinctrl-0 = <&state_default>; -- cgit v1.2.3 From 69c7e3772eaee5d2097725cdb79bc3ef867c0d9e Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Fri, 11 May 2012 10:41:01 +0200 Subject: ARM: SPEAr600: Fix timer interrupt definition in spear600.dtsi Signed-off-by: Stefan Roese Signed-off-by: Shiraz Hashim Acked-by: Viresh Kumar --- arch/arm/boot/dts/spear600.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/spear600.dtsi b/arch/arm/boot/dts/spear600.dtsi index 089f0a42c50e..a3c36e47d7ef 100644 --- a/arch/arm/boot/dts/spear600.dtsi +++ b/arch/arm/boot/dts/spear600.dtsi @@ -181,6 +181,7 @@ timer@f0000000 { compatible = "st,spear-timer"; reg = <0xf0000000 0x400>; + interrupt-parent = <&vic0>; interrupts = <16>; }; }; -- cgit v1.2.3 From 331ae4962b975246944ea039697a8f1cadce42bb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 18 Jul 2012 09:31:36 +0100 Subject: ext4: fix duplicated mnt_drop_write call in EXT4_IOC_MOVE_EXT Caused, AFAICS, by mismerge in commit ff9cb1c4eead ("Merge branch 'for_linus' into for_linus_merged") Signed-off-by: Al Viro Cc: Theodore Ts'o Cc: stable@vger.kernel.org # 3.3+ Signed-off-by: Linus Torvalds --- fs/ext4/ioctl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index e34deac3f366..6ec6f9ee2fec 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -268,7 +268,6 @@ group_extend_out: err = ext4_move_extents(filp, donor_filp, me.orig_start, me.donor_start, me.len, &me.moved_len); mnt_drop_write_file(filp); - mnt_drop_write(filp->f_path.mnt); if (copy_to_user((struct move_extent __user *)arg, &me, sizeof(me))) -- cgit v1.2.3 From 89d7ae34cdda4195809a5a987f697a517a2a3177 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 17 Jul 2012 11:07:47 +0000 Subject: cipso: don't follow a NULL pointer when setsockopt() is called As reported by Alan Cox, and verified by Lin Ming, when a user attempts to add a CIPSO option to a socket using the CIPSO_V4_TAG_LOCAL tag the kernel dies a terrible death when it attempts to follow a NULL pointer (the skb argument to cipso_v4_validate() is NULL when called via the setsockopt() syscall). This patch fixes this by first checking to ensure that the skb is non-NULL before using it to find the incoming network interface. In the unlikely case where the skb is NULL and the user attempts to add a CIPSO option with the _TAG_LOCAL tag we return an error as this is not something we want to allow. A simple reproducer, kindly supplied by Lin Ming, although you must have the CIPSO DOI #3 configure on the system first or you will be caught early in cipso_v4_validate(): #include #include #include #include #include struct local_tag { char type; char length; char info[4]; }; struct cipso { char type; char length; char doi[4]; struct local_tag local; }; int main(int argc, char **argv) { int sockfd; struct cipso cipso = { .type = IPOPT_CIPSO, .length = sizeof(struct cipso), .local = { .type = 128, .length = sizeof(struct local_tag), }, }; memset(cipso.doi, 0, 4); cipso.doi[3] = 3; sockfd = socket(AF_INET, SOCK_DGRAM, 0); #define SOL_IP 0 setsockopt(sockfd, SOL_IP, IP_OPTIONS, &cipso, sizeof(struct cipso)); return 0; } CC: Lin Ming Reported-by: Alan Cox Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/ipv4/cipso_ipv4.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index c48adc565e92..667c1d4ca984 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1725,8 +1725,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) case CIPSO_V4_TAG_LOCAL: /* This is a non-standard tag that we only allow for * local connections, so if the incoming interface is - * not the loopback device drop the packet. */ - if (!(skb->dev->flags & IFF_LOOPBACK)) { + * not the loopback device drop the packet. Further, + * there is no legitimate reason for setting this from + * userspace so reject it if skb is NULL. */ + if (skb == NULL || !(skb->dev->flags & IFF_LOOPBACK)) { err_offset = opt_iter; goto validate_return_locked; } -- cgit v1.2.3 From 2ab1c24bbd1bae22e0a54beba0cbb12272d940e4 Mon Sep 17 00:00:00 2001 From: Anirban Chakraborty Date: Tue, 17 Jul 2012 09:22:09 +0000 Subject: MAINTAINERS: Changes in qlcnic and qlge maintainers list Please apply. Thanks. Signed-off-by: Anirban Chakraborty Signed-off-by: David S. Miller --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c82c343168e8..fe643e7b9df6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5565,7 +5565,7 @@ F: Documentation/networking/LICENSE.qla3xxx F: drivers/net/ethernet/qlogic/qla3xxx.* QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER -M: Anirban Chakraborty +M: Jitendra Kalsaria M: Sony Chacko M: linux-driver@qlogic.com L: netdev@vger.kernel.org @@ -5573,7 +5573,6 @@ S: Supported F: drivers/net/ethernet/qlogic/qlcnic/ QLOGIC QLGE 10Gb ETHERNET DRIVER -M: Anirban Chakraborty M: Jitendra Kalsaria M: Ron Mercer M: linux-driver@qlogic.com -- cgit v1.2.3 From c1e3209623ccd92f2f391a31ecf3895daa0238f3 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 11 Jul 2012 14:12:45 +0200 Subject: v4l2-dev: forgot to add VIDIOC_DV_TIMINGS_CAP. The VIDIOC_DV_TIMINGS_CAP ioctl check wasn't added to determine_valid_ioctls(). This caused this ioctl to always return -ENOTTY. The cause for this was that for 3.5 two patch series were merged, one changing V4L2 core ioctl handling and one adding new functionality, and some of the new functionality wasn't handled by the new V4L2 core code. Signed-off-by: Hans Verkuil [ Taking it directly due to vacations - Linus ] Signed-off-by: Linus Torvalds --- drivers/media/video/v4l2-dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c index 83dbb2ddff10..0cbada18f6f5 100644 --- a/drivers/media/video/v4l2-dev.c +++ b/drivers/media/video/v4l2-dev.c @@ -681,6 +681,7 @@ static void determine_valid_ioctls(struct video_device *vdev) SET_VALID_IOCTL(ops, VIDIOC_G_DV_TIMINGS, vidioc_g_dv_timings); SET_VALID_IOCTL(ops, VIDIOC_ENUM_DV_TIMINGS, vidioc_enum_dv_timings); SET_VALID_IOCTL(ops, VIDIOC_QUERY_DV_TIMINGS, vidioc_query_dv_timings); + SET_VALID_IOCTL(ops, VIDIOC_DV_TIMINGS_CAP, vidioc_dv_timings_cap); /* yes, really vidioc_subscribe_event */ SET_VALID_IOCTL(ops, VIDIOC_DQEVENT, vidioc_subscribe_event); SET_VALID_IOCTL(ops, VIDIOC_SUBSCRIBE_EVENT, vidioc_subscribe_event); -- cgit v1.2.3 From 734b65417b24d6eea3e3d7457e1f11493890ee1d Mon Sep 17 00:00:00 2001 From: "Rustad, Mark D" Date: Wed, 18 Jul 2012 09:06:07 +0000 Subject: net: Statically initialize init_net.dev_base_head This change eliminates an initialization-order hazard most recently seen when netprio_cgroup is built into the kernel. With thanks to Eric Dumazet for catching a bug. Signed-off-by: Mark Rustad Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- net/core/net_namespace.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 0f28a9e0b8ad..1cb0d8a6aa6c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6283,7 +6283,8 @@ static struct hlist_head *netdev_create_hash(void) /* Initialize per network namespace state */ static int __net_init netdev_init(struct net *net) { - INIT_LIST_HEAD(&net->dev_base_head); + if (net != &init_net) + INIT_LIST_HEAD(&net->dev_base_head); net->dev_name_head = netdev_create_hash(); if (net->dev_name_head == NULL) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index dddbacb8f28c..42f1e1c7514f 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -27,7 +27,9 @@ static DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); -struct net init_net; +struct net init_net = { + .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), +}; EXPORT_SYMBOL(init_net); #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ -- cgit v1.2.3 From eea03c20ae38a55405c0865ed9adfccc400e4c8e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 18 Jul 2012 18:15:46 -0700 Subject: Make wait_for_device_probe() also do scsi_complete_async_scans() Commit a7a20d103994 ("sd: limit the scope of the async probe domain") make the SCSI device probing run device discovery in it's own async domain. However, as a result, the partition detection was no longer synchronized by async_synchronize_full() (which, despite the name, only synchronizes the global async space, not all of them). Which in turn meant that "wait_for_device_probe()" would not wait for the SCSI partitions to be parsed. And "wait_for_device_probe()" was what the boot time init code relied on for mounting the root filesystem. Now, most people never noticed this, because not only is it timing-dependent, but modern distributions all use initrd. So the root filesystem isn't actually on a disk at all. And then before they actually mount the final disk filesystem, they will have loaded the scsi-wait-scan module, which not only does the expected wait_for_device_probe(), but also does scsi_complete_async_scans(). [ Side note: scsi_complete_async_scans() had also been partially broken, but that was fixed in commit 43a8d39d0137 ("fix async probe regression"), so that same commit a7a20d103994 had actually broken setups even if you used scsi-wait-scan explicitly ] Solve this problem by just moving the scsi_complete_async_scans() call into wait_for_device_probe(). Everybody who wants to wait for device probing to finish really wants the SCSI probing to complete, so there's no reason not to do this. So now "wait_for_device_probe()" really does what the name implies, and properly waits for device probing to finish. This also removes the now unnecessary extra calls to scsi_complete_async_scans(). Reported-and-tested-by: Artem S. Tashkinov Cc: Dan Williams Cc: Alan Stern Cc: James Bottomley Cc: Borislav Petkov Cc: linux-scsi Signed-off-by: Linus Torvalds --- drivers/base/dd.c | 2 ++ drivers/scsi/scsi_wait_scan.c | 5 ----- include/linux/device.h | 2 -- kernel/power/hibernate.c | 8 -------- kernel/power/user.c | 2 -- 5 files changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index dcb8a6e48692..4b01ab3d2c24 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "base.h" #include "power/power.h" @@ -332,6 +333,7 @@ void wait_for_device_probe(void) /* wait for the known devices to complete their probing */ wait_event(probe_waitqueue, atomic_read(&probe_count) == 0); async_synchronize_full(); + scsi_complete_async_scans(); } EXPORT_SYMBOL_GPL(wait_for_device_probe); diff --git a/drivers/scsi/scsi_wait_scan.c b/drivers/scsi/scsi_wait_scan.c index ae7814874618..072734538876 100644 --- a/drivers/scsi/scsi_wait_scan.c +++ b/drivers/scsi/scsi_wait_scan.c @@ -22,11 +22,6 @@ static int __init wait_scan_init(void) * and might not yet have reached the scsi async scanning */ wait_for_device_probe(); - /* - * and then we wait for the actual asynchronous scsi scan - * to finish. - */ - scsi_complete_async_scans(); return 0; } diff --git a/include/linux/device.h b/include/linux/device.h index 161d96241b1b..6de94151ff6f 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -865,8 +865,6 @@ extern int (*platform_notify_remove)(struct device *dev); extern struct device *get_device(struct device *dev); extern void put_device(struct device *dev); -extern void wait_for_device_probe(void); - #ifdef CONFIG_DEVTMPFS extern int devtmpfs_create_node(struct device *dev); extern int devtmpfs_delete_node(struct device *dev); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 8b53db38a279..238025f5472e 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -27,7 +27,6 @@ #include #include #include -#include #include "power.h" @@ -748,13 +747,6 @@ static int software_resume(void) async_synchronize_full(); } - /* - * We can't depend on SCSI devices being available after loading - * one of their modules until scsi_complete_async_scans() is - * called and the resume device usually is a SCSI one. - */ - scsi_complete_async_scans(); - swsusp_resume_device = name_to_dev_t(resume_file); if (!swsusp_resume_device) { error = -ENODEV; diff --git a/kernel/power/user.c b/kernel/power/user.c index 91b0fd021a95..4ed81e74f86f 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -24,7 +24,6 @@ #include #include #include -#include #include @@ -84,7 +83,6 @@ static int snapshot_open(struct inode *inode, struct file *filp) * appear. */ wait_for_device_probe(); - scsi_complete_async_scans(); data->swap = -1; data->mode = O_WRONLY; -- cgit v1.2.3 From 893a0574de0c90a4e52c8f7070023b2eb58cd220 Mon Sep 17 00:00:00 2001 From: Yoichi Yuasa Date: Wed, 18 Jul 2012 14:12:01 -0700 Subject: mips: fix bug.h build regression Commit 377780887 ("bug.h: need linux/kernel.h for TAINT_WARN.") broke all MIPS builds: CC arch/mips/kernel/machine_kexec.o include/linux/log2.h: In function '__ilog2_u32': include/linux/log2.h:34:2: error: implicit declaration of function 'fls' [-Werror=implicit-function-declaration] include/linux/log2.h: In function '__ilog2_u64': include/linux/log2.h:42:2: error: implicit declaration of function 'fls64' [-Werror=implicit-function-declaration] ... Signed-off-by: Yoichi Yuasa Tested-by: John Crispin Cc: Ralf Baechle Cc: David Daney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/include/asm/bitops.h | 1 - arch/mips/include/asm/io.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index 2e1ad4c652b7..82ad35ce2b45 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -17,7 +17,6 @@ #include #include #include -#include #include /* sigh ... */ #include #include diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index a58f22998a86..29d9c23c20c7 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -17,6 +17,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From b1bdd2eb31309a3b61235613041180cd50be19a0 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Wed, 18 Jul 2012 14:12:04 -0700 Subject: kexec: update URL of kexec homepage The referenced html file does not exist anymore. Replace the URL with the current project homepage. Signed-off-by: Olaf Hering Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kdump/kdump.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt index 506c7390c2b9..13f1aa09b938 100644 --- a/Documentation/kdump/kdump.txt +++ b/Documentation/kdump/kdump.txt @@ -86,7 +86,7 @@ There is also a gitweb interface available at http://www.kernel.org/git/?p=utils/kernel/kexec/kexec-tools.git More information about kexec-tools can be found at -http://www.kernel.org/pub/linux/utils/kernel/kexec/README.html +http://horms.net/projects/kexec/ 3) Unpack the tarball with the tar command, as follows: -- cgit v1.2.3 From 25f7fd470bc97bb93d3a674e8c56c4a29063ec97 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 19 Jul 2012 15:59:18 +1000 Subject: md: fix bug in handling of new_data_offset commit c6563a8c38fde3c1c7fc925a10bde3ca20799301 md: add possibility to change data-offset for devices. introduced a 'new_data_offset' attribute which should normally be the same as 'data_offset', but can be explicitly set to a different value to allow a reshape operation to move the data. Unfortunately when the 'data_offset' is explicitly set through sysfs, the new_data_offset is not also set, so the two would become out-of-sync incorrectly. One result of this is that trying to set the 'size' after the 'data_offset' would fail because it is not permitted to set the size when the 'data_offset' and 'new_data_offset' are different - as that can be confusing. Consequently when mdadm tried to do this while assembling an IMSM array it would fail. This bug was introduced in 3.5-rc1. Reported-by: Brian Downing Bisected-by: Brian Downing Tested-by: Brian Downing Signed-off-by: NeilBrown --- drivers/md/md.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index a4c219e3c859..0c1fe4cbce6b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2931,6 +2931,7 @@ offset_store(struct md_rdev *rdev, const char *buf, size_t len) * can be sane */ return -EBUSY; rdev->data_offset = offset; + rdev->new_data_offset = offset; return len; } -- cgit v1.2.3 From a05b7ea03d72f36edb0cec05e8893803335c61a0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 19 Jul 2012 15:59:18 +1000 Subject: md: avoid crash when stopping md array races with closing other open fds. md will refuse to stop an array if any other fd (or mounted fs) is using it. When any fs is unmounted of when the last open fd is closed all pending IO will be flushed (e.g. sync_blockdev call in __blkdev_put) so there will be no pending IO to worry about when the array is stopped. However in order to send the STOP_ARRAY ioctl to stop the array one must first get and open fd on the block device. If some fd is being used to write to the block device and it is closed after mdadm open the block device, but before mdadm issues the STOP_ARRAY ioctl, then there will be no last-close on the md device so __blkdev_put will not call sync_blockdev. If this happens, then IO can still be in-flight while md tears down the array and bad things can happen (use-after-free and subsequent havoc). So in the case where do_md_stop is being called from an open file descriptor, call sync_block after taking the mutex to ensure there will be no new openers. This is needed when setting a read-write device to read-only too. Cc: stable@vger.kernel.org Reported-by: majianpeng Signed-off-by: NeilBrown --- drivers/md/md.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 0c1fe4cbce6b..d5ab4493c8be 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3927,8 +3927,8 @@ array_state_show(struct mddev *mddev, char *page) return sprintf(page, "%s\n", array_states[st]); } -static int do_md_stop(struct mddev * mddev, int ro, int is_open); -static int md_set_readonly(struct mddev * mddev, int is_open); +static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev); +static int md_set_readonly(struct mddev * mddev, struct block_device *bdev); static int do_md_run(struct mddev * mddev); static int restart_array(struct mddev *mddev); @@ -3944,14 +3944,14 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) /* stopping an active array */ if (atomic_read(&mddev->openers) > 0) return -EBUSY; - err = do_md_stop(mddev, 0, 0); + err = do_md_stop(mddev, 0, NULL); break; case inactive: /* stopping an active array */ if (mddev->pers) { if (atomic_read(&mddev->openers) > 0) return -EBUSY; - err = do_md_stop(mddev, 2, 0); + err = do_md_stop(mddev, 2, NULL); } else err = 0; /* already inactive */ break; @@ -3959,7 +3959,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) break; /* not supported yet */ case readonly: if (mddev->pers) - err = md_set_readonly(mddev, 0); + err = md_set_readonly(mddev, NULL); else { mddev->ro = 1; set_disk_ro(mddev->gendisk, 1); @@ -3969,7 +3969,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) case read_auto: if (mddev->pers) { if (mddev->ro == 0) - err = md_set_readonly(mddev, 0); + err = md_set_readonly(mddev, NULL); else if (mddev->ro == 1) err = restart_array(mddev); if (err == 0) { @@ -5352,15 +5352,17 @@ void md_stop(struct mddev *mddev) } EXPORT_SYMBOL_GPL(md_stop); -static int md_set_readonly(struct mddev *mddev, int is_open) +static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) { int err = 0; mutex_lock(&mddev->open_mutex); - if (atomic_read(&mddev->openers) > is_open) { + if (atomic_read(&mddev->openers) > !!bdev) { printk("md: %s still in use.\n",mdname(mddev)); err = -EBUSY; goto out; } + if (bdev) + sync_blockdev(bdev); if (mddev->pers) { __md_stop_writes(mddev); @@ -5382,18 +5384,26 @@ out: * 0 - completely stop and dis-assemble array * 2 - stop but do not disassemble array */ -static int do_md_stop(struct mddev * mddev, int mode, int is_open) +static int do_md_stop(struct mddev * mddev, int mode, + struct block_device *bdev) { struct gendisk *disk = mddev->gendisk; struct md_rdev *rdev; mutex_lock(&mddev->open_mutex); - if (atomic_read(&mddev->openers) > is_open || + if (atomic_read(&mddev->openers) > !!bdev || mddev->sysfs_active) { printk("md: %s still in use.\n",mdname(mddev)); mutex_unlock(&mddev->open_mutex); return -EBUSY; } + if (bdev) + /* It is possible IO was issued on some other + * open file which was closed before we took ->open_mutex. + * As that was not the last close __blkdev_put will not + * have called sync_blockdev, so we must. + */ + sync_blockdev(bdev); if (mddev->pers) { if (mddev->ro) @@ -5467,7 +5477,7 @@ static void autorun_array(struct mddev *mddev) err = do_md_run(mddev); if (err) { printk(KERN_WARNING "md: do_md_run() returned %d\n", err); - do_md_stop(mddev, 0, 0); + do_md_stop(mddev, 0, NULL); } } @@ -6482,11 +6492,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, goto done_unlock; case STOP_ARRAY: - err = do_md_stop(mddev, 0, 1); + err = do_md_stop(mddev, 0, bdev); goto done_unlock; case STOP_ARRAY_RO: - err = md_set_readonly(mddev, 1); + err = md_set_readonly(mddev, bdev); goto done_unlock; case BLKROSET: -- cgit v1.2.3 From 58e94ae18478c08229626daece2fc108a4a23261 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 19 Jul 2012 15:59:18 +1000 Subject: md/raid1: close some possible races on write errors during resync commit 4367af556133723d0f443e14ca8170d9447317cb md/raid1: clear bad-block record when write succeeds. Added a 'reschedule_retry' call possibility at the end of end_sync_write, but didn't add matching code at the end of sync_request_write. So if the writes complete very quickly, or scheduling makes it seem that way, then we can miss rescheduling the request and the resync could hang. Also commit 73d5c38a9536142e062c35997b044e89166e063b md: avoid races when stopping resync. Fix a race condition in this same code in end_sync_write but didn't make the change in sync_request_write. This patch updates sync_request_write to fix both of those. Patch is suitable for 3.1 and later kernels. Reported-by: Alexander Lyakas Original-version-by: Alexander Lyakas Cc: stable@vger.kernel.org Signed-off-by: NeilBrown --- drivers/md/raid1.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 240ff3125040..cacd008d6864 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1818,8 +1818,14 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) if (atomic_dec_and_test(&r1_bio->remaining)) { /* if we're here, all write(s) have completed, so clean up */ - md_done_sync(mddev, r1_bio->sectors, 1); - put_buf(r1_bio); + int s = r1_bio->sectors; + if (test_bit(R1BIO_MadeGood, &r1_bio->state) || + test_bit(R1BIO_WriteError, &r1_bio->state)) + reschedule_retry(r1_bio); + else { + put_buf(r1_bio); + md_done_sync(mddev, s, 1); + } } } -- cgit v1.2.3 From 78d4803f75277f78ab4fc3be7ad462d78f726df9 Mon Sep 17 00:00:00 2001 From: Leonid Yegoshin Date: Fri, 6 Jul 2012 21:56:01 +0200 Subject: MIPS: Don't panic on 5KEc. It's a bloody bog standard MIPS64R2 core with just a new PrId ID. Iow that essentially means Linux just panics because it doesn't know how to name the core. [ralf@linux-mips.org: Split original patch into several smaller patches.] Signed-off-by: Leonid Yegoshin Signed-off-by: Steven J. Hill Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/3792/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/cpu.h | 2 +- arch/mips/kernel/cpu-probe.c | 4 ++++ arch/mips/kernel/traps.c | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index c64910586b74..95e40c1e8ed1 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -266,7 +266,7 @@ enum cpu_type_enum { /* * MIPS64 class processors */ - CPU_5KC, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2, + CPU_5KC, CPU_5KE, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2, CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS, CPU_CAVIUM_OCTEON2, CPU_XLR, CPU_XLP, diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index aaf39f3eaa51..f4630e1082ab 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -811,6 +811,10 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu) c->cputype = CPU_5KC; __cpu_name[cpu] = "MIPS 5Kc"; break; + case PRID_IMP_5KE: + c->cputype = CPU_5KE; + __cpu_name[cpu] = "MIPS 5KE"; + break; case PRID_IMP_20KC: c->cputype = CPU_20KC; __cpu_name[cpu] = "MIPS 20Kc"; diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index f985b7292cd9..ce95f2c41f3f 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -1249,6 +1249,7 @@ static inline void parity_protection_init(void) break; case CPU_5KC: + case CPU_5KE: write_c0_ecc(0x80000000); back_to_back_c0_hazard(); /* Set the PE bit (bit 31) in the c0_errctl register. */ -- cgit v1.2.3 From c022630633624a75b3b58f43dd3c6cc896a56cff Mon Sep 17 00:00:00 2001 From: "Steven J. Hill" Date: Fri, 6 Jul 2012 21:56:01 +0200 Subject: MIPS: Refactor 'clear_page' and 'copy_page' functions. Remove usage of the '__attribute__((alias("...")))' hack that aliased to integer arrays containing micro-assembled instructions. This hack breaks when building a microMIPS kernel. It also makes the code much easier to understand. [ralf@linux-mips.org: Added back export of the clear_page and copy_page symbols so certain modules will work again. Also fixed build with CONFIG_SIBYTE_DMA_PAGEOPS enabled.] Signed-off-by: Steven J. Hill Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/3866/ Acked-by: David Daney Signed-off-by: Ralf Baechle --- arch/mips/kernel/mips_ksyms.c | 8 +++++- arch/mips/mm/Makefile | 4 +-- arch/mips/mm/page-funcs.S | 50 ++++++++++++++++++++++++++++++++ arch/mips/mm/page.c | 67 ++++++++++++------------------------------- 4 files changed, 77 insertions(+), 52 deletions(-) create mode 100644 arch/mips/mm/page-funcs.S diff --git a/arch/mips/kernel/mips_ksyms.c b/arch/mips/kernel/mips_ksyms.c index 57ba13edb03a..3fc1691110dc 100644 --- a/arch/mips/kernel/mips_ksyms.c +++ b/arch/mips/kernel/mips_ksyms.c @@ -5,7 +5,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1996, 97, 98, 99, 2000, 01, 03, 04, 05 by Ralf Baechle + * Copyright (C) 1996, 97, 98, 99, 2000, 01, 03, 04, 05, 12 by Ralf Baechle * Copyright (C) 1999, 2000, 01 Silicon Graphics, Inc. */ #include @@ -34,6 +34,12 @@ EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(kernel_thread); +/* + * Functions that operate on entire pages. Mostly used by memory management. + */ +EXPORT_SYMBOL(clear_page); +EXPORT_SYMBOL(copy_page); + /* * Userspace access stuff. */ diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index 4aa20280613e..fd6203f14f1f 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile @@ -3,8 +3,8 @@ # obj-y += cache.o dma-default.o extable.o fault.o \ - gup.o init.o mmap.o page.o tlbex.o \ - tlbex-fault.o uasm.o + gup.o init.o mmap.o page.o page-funcs.o \ + tlbex.o tlbex-fault.o uasm.o obj-$(CONFIG_32BIT) += ioremap.o pgtable-32.o obj-$(CONFIG_64BIT) += pgtable-64.o diff --git a/arch/mips/mm/page-funcs.S b/arch/mips/mm/page-funcs.S new file mode 100644 index 000000000000..48a6b38ff13e --- /dev/null +++ b/arch/mips/mm/page-funcs.S @@ -0,0 +1,50 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Micro-assembler generated clear_page/copy_page functions. + * + * Copyright (C) 2012 MIPS Technologies, Inc. + * Copyright (C) 2012 Ralf Baechle + */ +#include +#include + +#ifdef CONFIG_SIBYTE_DMA_PAGEOPS +#define cpu_clear_page_function_name clear_page_cpu +#define cpu_copy_page_function_name copy_page_cpu +#else +#define cpu_clear_page_function_name clear_page +#define cpu_copy_page_function_name copy_page +#endif + +/* + * Maximum sizes: + * + * R4000 128 bytes S-cache: 0x058 bytes + * R4600 v1.7: 0x05c bytes + * R4600 v2.0: 0x060 bytes + * With prefetching, 16 word strides 0x120 bytes + */ +EXPORT(__clear_page_start) +LEAF(cpu_clear_page_function_name) +1: j 1b /* Dummy, will be replaced. */ + .space 288 +END(cpu_clear_page_function_name) +EXPORT(__clear_page_end) + +/* + * Maximum sizes: + * + * R4000 128 bytes S-cache: 0x11c bytes + * R4600 v1.7: 0x080 bytes + * R4600 v2.0: 0x07c bytes + * With prefetching, 16 word strides 0x540 bytes + */ +EXPORT(__copy_page_start) +LEAF(cpu_copy_page_function_name) +1: j 1b /* Dummy, will be replaced. */ + .space 1344 +END(cpu_copy_page_function_name) +EXPORT(__copy_page_end) diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c index cc0b626858b3..98f530e18216 100644 --- a/arch/mips/mm/page.c +++ b/arch/mips/mm/page.c @@ -6,6 +6,7 @@ * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) * Copyright (C) 2007 Maciej W. Rozycki * Copyright (C) 2008 Thiemo Seufer + * Copyright (C) 2012 MIPS Technologies, Inc. */ #include #include @@ -71,45 +72,6 @@ static struct uasm_reloc __cpuinitdata relocs[5]; #define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010) #define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020) -/* - * Maximum sizes: - * - * R4000 128 bytes S-cache: 0x058 bytes - * R4600 v1.7: 0x05c bytes - * R4600 v2.0: 0x060 bytes - * With prefetching, 16 word strides 0x120 bytes - */ - -static u32 clear_page_array[0x120 / 4]; - -#ifdef CONFIG_SIBYTE_DMA_PAGEOPS -void clear_page_cpu(void *page) __attribute__((alias("clear_page_array"))); -#else -void clear_page(void *page) __attribute__((alias("clear_page_array"))); -#endif - -EXPORT_SYMBOL(clear_page); - -/* - * Maximum sizes: - * - * R4000 128 bytes S-cache: 0x11c bytes - * R4600 v1.7: 0x080 bytes - * R4600 v2.0: 0x07c bytes - * With prefetching, 16 word strides 0x540 bytes - */ -static u32 copy_page_array[0x540 / 4]; - -#ifdef CONFIG_SIBYTE_DMA_PAGEOPS -void -copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array"))); -#else -void copy_page(void *to, void *from) __attribute__((alias("copy_page_array"))); -#endif - -EXPORT_SYMBOL(copy_page); - - static int pref_bias_clear_store __cpuinitdata; static int pref_bias_copy_load __cpuinitdata; static int pref_bias_copy_store __cpuinitdata; @@ -282,10 +244,15 @@ static inline void __cpuinit build_clear_pref(u32 **buf, int off) } } +extern u32 __clear_page_start; +extern u32 __clear_page_end; +extern u32 __copy_page_start; +extern u32 __copy_page_end; + void __cpuinit build_clear_page(void) { int off; - u32 *buf = (u32 *)&clear_page_array; + u32 *buf = &__clear_page_start; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; int i; @@ -356,17 +323,17 @@ void __cpuinit build_clear_page(void) uasm_i_jr(&buf, RA); uasm_i_nop(&buf); - BUG_ON(buf > clear_page_array + ARRAY_SIZE(clear_page_array)); + BUG_ON(buf > &__clear_page_end); uasm_resolve_relocs(relocs, labels); pr_debug("Synthesized clear page handler (%u instructions).\n", - (u32)(buf - clear_page_array)); + (u32)(buf - &__clear_page_start)); pr_debug("\t.set push\n"); pr_debug("\t.set noreorder\n"); - for (i = 0; i < (buf - clear_page_array); i++) - pr_debug("\t.word 0x%08x\n", clear_page_array[i]); + for (i = 0; i < (buf - &__clear_page_start); i++) + pr_debug("\t.word 0x%08x\n", (&__clear_page_start)[i]); pr_debug("\t.set pop\n"); } @@ -427,7 +394,7 @@ static inline void build_copy_store_pref(u32 **buf, int off) void __cpuinit build_copy_page(void) { int off; - u32 *buf = (u32 *)©_page_array; + u32 *buf = &__copy_page_start; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; int i; @@ -595,21 +562,23 @@ void __cpuinit build_copy_page(void) uasm_i_jr(&buf, RA); uasm_i_nop(&buf); - BUG_ON(buf > copy_page_array + ARRAY_SIZE(copy_page_array)); + BUG_ON(buf > &__copy_page_end); uasm_resolve_relocs(relocs, labels); pr_debug("Synthesized copy page handler (%u instructions).\n", - (u32)(buf - copy_page_array)); + (u32)(buf - &__copy_page_start)); pr_debug("\t.set push\n"); pr_debug("\t.set noreorder\n"); - for (i = 0; i < (buf - copy_page_array); i++) - pr_debug("\t.word 0x%08x\n", copy_page_array[i]); + for (i = 0; i < (buf - &__copy_page_start); i++) + pr_debug("\t.word 0x%08x\n", (&__copy_page_start)[i]); pr_debug("\t.set pop\n"); } #ifdef CONFIG_SIBYTE_DMA_PAGEOPS +extern void clear_page_cpu(void *page); +extern void copy_page_cpu(void *to, void *from); /* * Pad descriptors to cacheline, since each is exclusively owned by a -- cgit v1.2.3 From dc34b05fea0cc9a869863b929f37f1e8ce30edf4 Mon Sep 17 00:00:00 2001 From: Douglas Leung Date: Thu, 19 Jul 2012 09:11:13 +0200 Subject: MIPS: Fix decoding of c0_config1 for MIPSxx caches with 32 ways per set. This affects certain 4Kc cores. Signed-off-by: Douglas Leung Signed-off-by: Steven J. Hill Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/3855/ Signed-off-by: Ralf Baechle --- arch/mips/mm/c-r4k.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index e56efd059189..f092c265dc63 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -977,7 +977,7 @@ static void __cpuinit probe_pcache(void) c->icache.linesz = 2 << lsize; else c->icache.linesz = lsize; - c->icache.sets = 64 << ((config1 >> 22) & 7); + c->icache.sets = 32 << (((config1 >> 22) + 1) & 7); c->icache.ways = 1 + ((config1 >> 16) & 7); icache_size = c->icache.sets * @@ -997,7 +997,7 @@ static void __cpuinit probe_pcache(void) c->dcache.linesz = 2 << lsize; else c->dcache.linesz= lsize; - c->dcache.sets = 64 << ((config1 >> 13) & 7); + c->dcache.sets = 32 << (((config1 >> 13) + 1) & 7); c->dcache.ways = 1 + ((config1 >> 7) & 7); dcache_size = c->dcache.sets * -- cgit v1.2.3 From 2dd17030c940ef1199a07b095ec79c4660fa8734 Mon Sep 17 00:00:00 2001 From: Leonid Yegoshin Date: Thu, 19 Jul 2012 09:11:14 +0200 Subject: MIPS: Fix race condition with FPU thread task flag during context switch. [ralf@linux-mips.org: Cosmetic changes; also fixed up r2300_switch.S and octeon_switch.S which needed similar modifications.] Signed-off-by: Leonid Yegoshin Signed-off-by: Steven J. Hill Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/3784/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/switch_to.h | 6 ++++-- arch/mips/kernel/octeon_switch.S | 2 +- arch/mips/kernel/r2300_switch.S | 15 +++------------ arch/mips/kernel/r4k_switch.S | 12 +++--------- 4 files changed, 11 insertions(+), 24 deletions(-) diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h index 5d33621b5658..4f8ddba8c360 100644 --- a/arch/mips/include/asm/switch_to.h +++ b/arch/mips/include/asm/switch_to.h @@ -22,7 +22,7 @@ struct task_struct; * switch_to(n) should switch tasks to task nr n, first * checking that n isn't the current task, in which case it does nothing. */ -extern asmlinkage void *resume(void *last, void *next, void *next_ti); +extern asmlinkage void *resume(void *last, void *next, void *next_ti, u32 __usedfpu); extern unsigned int ll_bit; extern struct task_struct *ll_task; @@ -66,11 +66,13 @@ do { \ #define switch_to(prev, next, last) \ do { \ + u32 __usedfpu; \ __mips_mt_fpaff_switch_to(prev); \ if (cpu_has_dsp) \ __save_dsp(prev); \ __clear_software_ll_bit(); \ - (last) = resume(prev, next, task_thread_info(next)); \ + __usedfpu = test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU); \ + (last) = resume(prev, next, task_thread_info(next), __usedfpu); \ } while (0) #define finish_arch_switch(prev) \ diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S index ce89c8061708..0441f54b2a6a 100644 --- a/arch/mips/kernel/octeon_switch.S +++ b/arch/mips/kernel/octeon_switch.S @@ -31,7 +31,7 @@ /* * task_struct *resume(task_struct *prev, task_struct *next, - * struct thread_info *next_ti) + * struct thread_info *next_ti, int usedfpu) */ .align 7 LEAF(resume) diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S index 293898391e67..9c51be5a163a 100644 --- a/arch/mips/kernel/r2300_switch.S +++ b/arch/mips/kernel/r2300_switch.S @@ -43,7 +43,7 @@ /* * task_struct *resume(task_struct *prev, task_struct *next, - * struct thread_info *next_ti) ) + * struct thread_info *next_ti, int usedfpu) */ LEAF(resume) mfc0 t1, CP0_STATUS @@ -51,18 +51,9 @@ LEAF(resume) cpu_save_nonscratch a0 sw ra, THREAD_REG31(a0) - /* - * check if we need to save FPU registers - */ - lw t3, TASK_THREAD_INFO(a0) - lw t0, TI_FLAGS(t3) - li t1, _TIF_USEDFPU - and t2, t0, t1 - beqz t2, 1f - nor t1, zero, t1 + beqz a3, 1f - and t0, t0, t1 - sw t0, TI_FLAGS(t3) + PTR_L t3, TASK_THREAD_INFO(a0) /* * clear saved user stack CU1 bit diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S index 9414f9354469..42d2a3938420 100644 --- a/arch/mips/kernel/r4k_switch.S +++ b/arch/mips/kernel/r4k_switch.S @@ -41,7 +41,7 @@ /* * task_struct *resume(task_struct *prev, task_struct *next, - * struct thread_info *next_ti) + * struct thread_info *next_ti, int usedfpu) */ .align 5 LEAF(resume) @@ -53,16 +53,10 @@ /* * check if we need to save FPU registers */ - PTR_L t3, TASK_THREAD_INFO(a0) - LONG_L t0, TI_FLAGS(t3) - li t1, _TIF_USEDFPU - and t2, t0, t1 - beqz t2, 1f - nor t1, zero, t1 - and t0, t0, t1 - LONG_S t0, TI_FLAGS(t3) + beqz a3, 1f + PTR_L t3, TASK_THREAD_INFO(a0) /* * clear saved user stack CU1 bit */ -- cgit v1.2.3 From c5de50dada1403e1abc8cd4a8c9a1283c859e0bb Mon Sep 17 00:00:00 2001 From: "Steven J. Hill" Date: Thu, 19 Jul 2012 09:11:14 +0200 Subject: MIPS: Malta: Change start address to avoid conflicts. There are ACPI and SMB devices in the 0x1000..0x1fff address range. Signed-off-by: Steven J. Hill Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/3581/ Signed-off-by: Ralf Baechle --- arch/mips/mti-malta/malta-pci.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/mips/mti-malta/malta-pci.c b/arch/mips/mti-malta/malta-pci.c index bf80921f2f56..916206823d45 100644 --- a/arch/mips/mti-malta/malta-pci.c +++ b/arch/mips/mti-malta/malta-pci.c @@ -241,8 +241,9 @@ void __init mips_pcibios_init(void) return; } - if (controller->io_resource->start < 0x00001000UL) /* FIXME */ - controller->io_resource->start = 0x00001000UL; + /* Change start address to avoid conflicts with ACPI and SMB devices */ + if (controller->io_resource->start < 0x00002000UL) + controller->io_resource->start = 0x00002000UL; iomem_resource.end &= 0xfffffffffULL; /* 64 GB */ ioport_resource.end = controller->io_resource->end; -- cgit v1.2.3 From 7b1c0d26a8e272787f0f9fcc5f3e8531df3b3409 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 19 Jul 2012 09:11:14 +0200 Subject: MIPS: Properly align the .data..init_task section. Improper alignment can lead to unbootable systems and/or random crashes. [ralf@linux-mips.org: This is a lond standing bug since 6eb10bc9e2deab06630261cd05c4cb1e9a60e980 (kernel.org) rsp. c422a10917f75fd19fa7fe070aaaa23e384dae6f (lmo) [MIPS: Clean up linker script using new linker script macros.] so dates back to 2.6.32.] Signed-off-by: David Daney Cc: linux-mips@linux-mips.org Cc: Patchwork: https://patchwork.linux-mips.org/patch/3881/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/thread_info.h | 4 ++-- arch/mips/kernel/vmlinux.lds.S | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index e2eca7d10598..ca97e0ecb64b 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -60,6 +60,8 @@ struct thread_info { register struct thread_info *__current_thread_info __asm__("$28"); #define current_thread_info() __current_thread_info +#endif /* !__ASSEMBLY__ */ + /* thread information allocation */ #if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT) #define THREAD_SIZE_ORDER (1) @@ -85,8 +87,6 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define STACK_WARN (THREAD_SIZE / 8) -#endif /* !__ASSEMBLY__ */ - #define PREEMPT_ACTIVE 0x10000000 /* diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 924da5eb7031..df243a64f430 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -1,5 +1,6 @@ #include #include +#include #include #undef mips @@ -72,7 +73,7 @@ SECTIONS .data : { /* Data */ . = . + DATAOFFSET; /* for CONFIG_MAPPED_KERNEL */ - INIT_TASK_DATA(PAGE_SIZE) + INIT_TASK_DATA(THREAD_SIZE) NOSAVE_DATA CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) -- cgit v1.2.3 From c7b2ec2106b906e9233eefaac310ca8f4ce26934 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 19 Jul 2012 09:11:14 +0200 Subject: MIPS: SMTC: Spelling and grammar corrections. Extractd from Steven J. Hill's https://patchwork.linux-mips.org/patch/3603/. Signed-off-by: Ralf Baechle --- arch/mips/kernel/smtc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index f5dd38f1d015..b450ea529ddf 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -322,7 +322,7 @@ int __init smtc_build_cpu_map(int start_cpu_slot) /* * Common setup before any secondaries are started - * Make sure all CPU's are in a sensible state before we boot any of the + * Make sure all CPUs are in a sensible state before we boot any of the * secondaries. * * For MIPS MT "SMTC" operation, we set up all TCs, spread as evenly @@ -340,12 +340,12 @@ static void smtc_tc_setup(int vpe, int tc, int cpu) /* * TCContext gets an offset from the base of the IPIQ array * to be used in low-level code to detect the presence of - * an active IPI queue + * an active IPI queue. */ write_tc_c0_tccontext((sizeof(struct smtc_ipi_q) * cpu) << 16); /* Bind tc to vpe */ write_tc_c0_tcbind(vpe); - /* In general, all TCs should have the same cpu_data indications */ + /* In general, all TCs should have the same cpu_data indications. */ memcpy(&cpu_data[cpu], &cpu_data[0], sizeof(struct cpuinfo_mips)); /* For 34Kf, start with TC/CPU 0 as sole owner of single FPU context */ if (cpu_data[0].cputype == CPU_34K || @@ -358,8 +358,8 @@ static void smtc_tc_setup(int vpe, int tc, int cpu) } /* - * Tweak to get Count registes in as close a sync as possible. - * Value seems good for 34K-class cores. + * Tweak to get Count registes in as close a sync as possible. The + * value seems good for 34K-class cores. */ #define CP0_SKEW 8 -- cgit v1.2.3 From b96b62db8cec46693e192f31c2c14147212530fc Mon Sep 17 00:00:00 2001 From: Yoichi Yuasa Date: Thu, 19 Jul 2012 09:11:15 +0200 Subject: MIPS: BCM47xx: Fix BCMA_DRIVER_PCI_HOSTMODE config dependencies warning: (BCM47XX_BCMA) selects BCMA_DRIVER_PCI_HOSTMODE which has unmet direct dependencies (BCMA_POSSIBLE && BCMA && MIPS && BCMA_HOST_PCI) warning: (BCM47XX_BCMA) selects BCMA_DRIVER_PCI_HOSTMODE which has unmet direct dependencies (BCMA_POSSIBLE && BCMA && MIPS && BCMA_HOST_PCI) Signed-off-by: Yoichi Yuasa Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/3882/ Signed-off-by: Ralf Baechle --- arch/mips/bcm47xx/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/bcm47xx/Kconfig b/arch/mips/bcm47xx/Kconfig index 6210b8d84109..b311be45a720 100644 --- a/arch/mips/bcm47xx/Kconfig +++ b/arch/mips/bcm47xx/Kconfig @@ -21,6 +21,7 @@ config BCM47XX_BCMA select BCMA select BCMA_HOST_SOC select BCMA_DRIVER_MIPS + select BCMA_HOST_PCI if PCI select BCMA_DRIVER_PCI_HOSTMODE if PCI default y help -- cgit v1.2.3 From 7ee91de45a9a841ac59d597901e984b859b31bbe Mon Sep 17 00:00:00 2001 From: Yoichi Yuasa Date: Thu, 19 Jul 2012 09:11:15 +0200 Subject: MIPS: Cavium: Fix duplicate ARCH_SPARSEMEM_ENABLE in kconfig. Signed-off-by: Yoichi Yuasa Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/3883/ Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 1 + arch/mips/cavium-octeon/Kconfig | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 09ab87ee6fef..5eb8c932fe53 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1423,6 +1423,7 @@ config CPU_SB1 config CPU_CAVIUM_OCTEON bool "Cavium Octeon processor" depends on SYS_HAS_CPU_CAVIUM_OCTEON + select ARCH_SPARSEMEM_ENABLE select CPU_HAS_PREFETCH select CPU_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_SMP diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig index f9e275a50d98..2f4f6d5e05b6 100644 --- a/arch/mips/cavium-octeon/Kconfig +++ b/arch/mips/cavium-octeon/Kconfig @@ -82,10 +82,6 @@ config CAVIUM_OCTEON_LOCK_L2_MEMCPY help Lock the kernel's implementation of memcpy() into L2. -config ARCH_SPARSEMEM_ENABLE - def_bool y - select SPARSEMEM_STATIC - config IOMMU_HELPER bool -- cgit v1.2.3 From a586e14f2c4932fd8d4f14f713874f84f36f91c6 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 19 Jul 2012 09:11:15 +0200 Subject: MIPS: Fix typo multipy -> multiply Signed-off-by: Ralf Baechle --- arch/mips/include/asm/inst.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/inst.h b/arch/mips/include/asm/inst.h index 7ebfc392e58d..ab84064283db 100644 --- a/arch/mips/include/asm/inst.h +++ b/arch/mips/include/asm/inst.h @@ -251,7 +251,7 @@ struct f_format { /* FPU register format */ unsigned int func : 6; }; -struct ma_format { /* FPU multipy and add format (MIPS IV) */ +struct ma_format { /* FPU multiply and add format (MIPS IV) */ unsigned int opcode : 6; unsigned int fr : 5; unsigned int ft : 5; @@ -324,7 +324,7 @@ struct f_format { /* FPU register format */ unsigned int opcode : 6; }; -struct ma_format { /* FPU multipy and add format (MIPS IV) */ +struct ma_format { /* FPU multiply and add format (MIPS IV) */ unsigned int fmt : 2; unsigned int func : 4; unsigned int fd : 5; -- cgit v1.2.3 From 5d9fbed18e25c0ce4fe404550fdb46eaa54e52ce Mon Sep 17 00:00:00 2001 From: Leonid Yegoshin Date: Thu, 19 Jul 2012 09:11:15 +0200 Subject: MIPS: Malta may also be equipped with MIPS64 R2 processors. Signed-off-by: Leonid Yegoshin Signed-off-by: Steven J. Hill Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/3792/ Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 5eb8c932fe53..b3e10fdd3898 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -288,6 +288,7 @@ config MIPS_MALTA select SYS_HAS_CPU_MIPS32_R1 select SYS_HAS_CPU_MIPS32_R2 select SYS_HAS_CPU_MIPS64_R1 + select SYS_HAS_CPU_MIPS64_R2 select SYS_HAS_CPU_NEVADA select SYS_HAS_CPU_RM7000 select SYS_HAS_EARLY_PRINTK -- cgit v1.2.3 From 5520e426901ccb3e6683132e5b70425a811d7f78 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Thu, 19 Jul 2012 09:11:15 +0200 Subject: MIPS: cmpxchg.h: Add missing include Fix the following build breakage in v3.4-rc1: CC kernel/irq_work.o In file included from include/linux/irq_work.h:4:0, from kernel/irq_work.c:10: include/linux/llist.h: In function 'llist_del_all': include/linux/llist.h:178:2: error: implicit declaration of function 'BUILD_BUG_ON' [-Werror=implicit-function-declaration] Signed-off-by: Aaro Koskinen Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/3568/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/cmpxchg.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index 285a41fa0b18..eee10dc07ac1 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -8,6 +8,7 @@ #ifndef __ASM_CMPXCHG_H #define __ASM_CMPXCHG_H +#include #include #include -- cgit v1.2.3 From 4a043d79dc2d082b0df28820d43b96f1cdaf29e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 19 Jul 2012 09:11:16 +0200 Subject: mips: mark const init data with __initconst instead of __initdata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As long as there is no other non-const variable marked __initdata in the same compilation unit it doesn't hurt. If there were one however compilation would fail with error: $variablename causes a section type conflict because a section containing const variables is marked read only and so cannot contain non-const variables. Signed-off-by: Uwe Kleine-König Cc: linux-mips@linux-mips.org Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: kernel@pengutronix.de Patchwork: https://patchwork.linux-mips.org/patch/3565/ Signed-off-by: Ralf Baechle --- arch/mips/bcm63xx/dev-pcmcia.c | 4 ++-- arch/mips/mti-malta/malta-setup.c | 2 +- arch/mips/pci/fixup-mpc30x.c | 4 ++-- arch/mips/powertv/asic/asic-calliope.c | 2 +- arch/mips/powertv/asic/asic-cronus.c | 2 +- arch/mips/powertv/asic/asic-gaia.c | 2 +- arch/mips/powertv/asic/asic-zeus.c | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/mips/bcm63xx/dev-pcmcia.c b/arch/mips/bcm63xx/dev-pcmcia.c index de4d917fd54d..a551bab5ecb9 100644 --- a/arch/mips/bcm63xx/dev-pcmcia.c +++ b/arch/mips/bcm63xx/dev-pcmcia.c @@ -79,11 +79,11 @@ static int __init config_pcmcia_cs(unsigned int cs, return ret; } -static const __initdata struct { +static const struct { unsigned int cs; unsigned int base; unsigned int size; -} pcmcia_cs[3] = { +} pcmcia_cs[3] __initconst = { { .cs = MPI_CS_PCMCIA_COMMON, .base = BCM_PCMCIA_COMMON_BASE_PA, diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index b7f37d4982fa..2e28f653f66d 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -111,7 +111,7 @@ static void __init pci_clock_check(void) unsigned int __iomem *jmpr_p = (unsigned int *) ioremap(MALTA_JMPRS_REG, sizeof(unsigned int)); int jmpr = (__raw_readl(jmpr_p) >> 2) & 0x07; - static const int pciclocks[] __initdata = { + static const int pciclocks[] __initconst = { 33, 20, 25, 30, 12, 16, 37, 10 }; int pciclock = pciclocks[jmpr]; diff --git a/arch/mips/pci/fixup-mpc30x.c b/arch/mips/pci/fixup-mpc30x.c index e08f49cb6875..8e4f8288eca2 100644 --- a/arch/mips/pci/fixup-mpc30x.c +++ b/arch/mips/pci/fixup-mpc30x.c @@ -22,13 +22,13 @@ #include -static const int internal_func_irqs[] __initdata = { +static const int internal_func_irqs[] __initconst = { VRC4173_CASCADE_IRQ, VRC4173_AC97_IRQ, VRC4173_USB_IRQ, }; -static const int irq_tab_mpc30x[] __initdata = { +static const int irq_tab_mpc30x[] __initconst = { [12] = VRC4173_PCMCIA1_IRQ, [13] = VRC4173_PCMCIA2_IRQ, [29] = MQ200_IRQ, diff --git a/arch/mips/powertv/asic/asic-calliope.c b/arch/mips/powertv/asic/asic-calliope.c index 0a170e0ffeaa..7773f3d956b0 100644 --- a/arch/mips/powertv/asic/asic-calliope.c +++ b/arch/mips/powertv/asic/asic-calliope.c @@ -28,7 +28,7 @@ #define CALLIOPE_ADDR(x) (CALLIOPE_IO_BASE + (x)) -const struct register_map calliope_register_map __initdata = { +const struct register_map calliope_register_map __initconst = { .eic_slow0_strt_add = {.phys = CALLIOPE_ADDR(0x800000)}, .eic_cfg_bits = {.phys = CALLIOPE_ADDR(0x800038)}, .eic_ready_status = {.phys = CALLIOPE_ADDR(0x80004c)}, diff --git a/arch/mips/powertv/asic/asic-cronus.c b/arch/mips/powertv/asic/asic-cronus.c index bbc0c122be5e..da076db7b7ed 100644 --- a/arch/mips/powertv/asic/asic-cronus.c +++ b/arch/mips/powertv/asic/asic-cronus.c @@ -28,7 +28,7 @@ #define CRONUS_ADDR(x) (CRONUS_IO_BASE + (x)) -const struct register_map cronus_register_map __initdata = { +const struct register_map cronus_register_map __initconst = { .eic_slow0_strt_add = {.phys = CRONUS_ADDR(0x000000)}, .eic_cfg_bits = {.phys = CRONUS_ADDR(0x000038)}, .eic_ready_status = {.phys = CRONUS_ADDR(0x00004C)}, diff --git a/arch/mips/powertv/asic/asic-gaia.c b/arch/mips/powertv/asic/asic-gaia.c index 91dda682752c..47683b370e74 100644 --- a/arch/mips/powertv/asic/asic-gaia.c +++ b/arch/mips/powertv/asic/asic-gaia.c @@ -23,7 +23,7 @@ #include #include -const struct register_map gaia_register_map __initdata = { +const struct register_map gaia_register_map __initconst = { .eic_slow0_strt_add = {.phys = GAIA_IO_BASE + 0x000000}, .eic_cfg_bits = {.phys = GAIA_IO_BASE + 0x000038}, .eic_ready_status = {.phys = GAIA_IO_BASE + 0x00004C}, diff --git a/arch/mips/powertv/asic/asic-zeus.c b/arch/mips/powertv/asic/asic-zeus.c index 4a05bb096476..6ff4b10f09da 100644 --- a/arch/mips/powertv/asic/asic-zeus.c +++ b/arch/mips/powertv/asic/asic-zeus.c @@ -28,7 +28,7 @@ #define ZEUS_ADDR(x) (ZEUS_IO_BASE + (x)) -const struct register_map zeus_register_map __initdata = { +const struct register_map zeus_register_map __initconst = { .eic_slow0_strt_add = {.phys = ZEUS_ADDR(0x000000)}, .eic_cfg_bits = {.phys = ZEUS_ADDR(0x000038)}, .eic_ready_status = {.phys = ZEUS_ADDR(0x00004c)}, -- cgit v1.2.3 From ca760ca5238c55cd0e29291c63e35ac6634d385f Mon Sep 17 00:00:00 2001 From: Danny Kukawka Date: Thu, 19 Jul 2012 09:11:16 +0200 Subject: MIPS: BMIPS: Fix duplicate header inclusion. Signed-off-by: Danny Kukawka Cc: Danny Kukawka Cc: Kevin Cernekee Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/3369/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/smp-bmips.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 3046e2986006..32fe9254d943 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From e909be825212da62433e805f03586015a04f3c78 Mon Sep 17 00:00:00 2001 From: Vincent Wen Date: Thu, 19 Jul 2012 09:11:16 +0200 Subject: MIPS: Fix Magic SysRq L kernel crash. show_backtrace() was passed a NULL pointer which caused paging request fail. Set to current task as other architectures (ARM, etc) do when passed a NULL task pointer. Signed-off-by: Vincent Wen Cc: linux-mips@linux-mips.org Cc: cernekee@gmail.com Patchwork: https://patchwork.linux-mips.org/patch/3524/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/traps.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index ce95f2c41f3f..9716f057a77b 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -132,6 +132,9 @@ static void show_backtrace(struct task_struct *task, const struct pt_regs *regs) unsigned long ra = regs->regs[31]; unsigned long pc = regs->cp0_epc; + if (!task) + task = current; + if (raw_show_trace || !__kernel_text_address(pc)) { show_raw_backtrace(sp); return; -- cgit v1.2.3 From 6c37c9580409af7dc664bb6af0a85d540d63aeea Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 19 Jul 2012 09:13:52 +0200 Subject: MIPS: perf: Fix build error caused by unused counters_per_cpu_to_total() cc1: warnings being treated as errors arch/mips/kernel/perf_event_mipsxx.c:166: error: 'counters_per_cpu_to_total' defined but not used make[2]: *** [arch/mips/kernel/perf_event_mipsxx.o] Error 1 make[2]: *** Waiting for unfinished jobs.... It was first introduced by 82091564cfd7ab8def42777a9c662dbf655c5d25 [MIPS: perf: Add support for 64-bit perf counters.] in 3.2. Signed-off-by: Florian Fainelli Cc: linux-mips@linux-mips.org Cc: david.daney@cavium.com Patchwork: https://patchwork.linux-mips.org/patch/3357/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/perf_event_mipsxx.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index f29099b104c4..eb5e394a4650 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -162,11 +162,6 @@ static unsigned int counters_total_to_per_cpu(unsigned int counters) return counters >> vpe_shift(); } -static unsigned int counters_per_cpu_to_total(unsigned int counters) -{ - return counters << vpe_shift(); -} - #else /* !CONFIG_MIPS_MT_SMP */ #define vpe_id() 0 -- cgit v1.2.3 From 38be3c7e77ee9bc8ca34a83506e34809ec36c2a4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 19 Jul 2012 09:13:52 +0200 Subject: MIPS: BCM63XX: Fix BCM6368 IPSec clock bit The IPsec clock bit is 18 and not 17. Signed-off-by: Florian Fainelli Cc: linux-mips@linux-mips.org Cc: mpm@selenic.com Cc: herbert@gondor.apana.org.au Patchwork: https://patchwork.linux-mips.org/patch/3323/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h index 94d4faad29a1..fdcd78ca1b03 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h @@ -99,7 +99,7 @@ #define CKCTL_6368_USBH_CLK_EN (1 << 15) #define CKCTL_6368_DISABLE_GLESS_EN (1 << 16) #define CKCTL_6368_NAND_CLK_EN (1 << 17) -#define CKCTL_6368_IPSEC_CLK_EN (1 << 17) +#define CKCTL_6368_IPSEC_CLK_EN (1 << 18) #define CKCTL_6368_ALL_SAFE_EN (CKCTL_6368_SWPKT_USB_EN | \ CKCTL_6368_SWPKT_SAR_EN | \ -- cgit v1.2.3 From 68b6352cdcdaa743b38ca5705601f65ef9bd662f Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 19 Jul 2012 09:13:52 +0200 Subject: MIPS: Oprofile: Fix build as a module. When building oprofile as a module for R10000 or R7000 class processors, E9000 or MIPSxx class cores since 3572a2c37f667ee49333f8863722b8f43eac506b [MIPS: make oprofile use cp0_perfcount_irq if it is set] an ERROR: "cp0_compare_irq" [arch/mips/oprofile/oprofile.ko] undefined! error will happen. Fixed by exporting cp0_compare_irq. Signed-off-by: Ralf Baechle --- arch/mips/kernel/traps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 9716f057a77b..c3c293543703 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -1502,6 +1502,7 @@ extern void flush_tlb_handlers(void); * Timer interrupt */ int cp0_compare_irq; +EXPORT_SYMBOL_GPL(cp0_compare_irq); int cp0_compare_irq_shift; /* -- cgit v1.2.3 From 1bcfecc028686ea32e49b0f4f6e8a665917cb49a Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Thu, 19 Jul 2012 09:13:53 +0200 Subject: MIPS: Octeon: delay enable irq to ->smp_finish() To prepare for smoothing set_cpu_[active|online]() mess up Signed-off-by: Yong Zhang Cc: Sergei Shtylyov Cc: David Daney Acked-by: David Daney Patchwork: https://patchwork.linux-mips.org/patch/3845/ Signed-off-by: Ralf Baechle --- arch/mips/cavium-octeon/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 4b93048044eb..ee1fb9f7f517 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -185,7 +185,6 @@ static void __cpuinit octeon_init_secondary(void) octeon_init_cvmcount(); octeon_irq_setup_secondary(); - raw_local_irq_enable(); } /** @@ -233,6 +232,7 @@ static void octeon_smp_finish(void) /* to generate the first CPU timer interrupt */ write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ); + local_irq_enable(); } /** -- cgit v1.2.3 From 856ac3c6e0c4cb566014edf5fa185b962298db88 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Thu, 19 Jul 2012 09:13:53 +0200 Subject: MIPS: BMIPS: delay irq enable to ->smp_finish() To prepare for smoothing set_cpu_[active|online]() mess up Signed-off-by: Yong Zhang Cc: Sergei Shtylyov Cc: David Daney Acked-by: David Daney Patchwork: https://patchwork.linux-mips.org/patch/3846/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/smp-bmips.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 32fe9254d943..8e393b8443f7 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -196,13 +196,6 @@ static void bmips_init_secondary(void) write_c0_brcm_action(ACTION_CLR_IPI(smp_processor_id(), 0)); #endif - - /* make sure there won't be a timer interrupt for a little while */ - write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ); - - irq_enable_hazard(); - set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ1 | IE_IRQ5 | ST0_IE); - irq_enable_hazard(); } /* @@ -211,6 +204,13 @@ static void bmips_init_secondary(void) static void bmips_smp_finish(void) { pr_info("SMP: CPU%d is running\n", smp_processor_id()); + + /* make sure there won't be a timer interrupt for a little while */ + write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ); + + irq_enable_hazard(); + set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ1 | IE_IRQ5 | ST0_IE); + irq_enable_hazard(); } /* -- cgit v1.2.3 From 70dc8fa782efa2faa82ecec0f250223e1d773a47 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Thu, 19 Jul 2012 09:13:53 +0200 Subject: MIPS: SMTC: delay irq enable to ->smp_finish() To prepare for smoothing set_cpu_[active|online]() mess up Signed-off-by: Yong Zhang Cc: Sergei Shtylyov Cc: David Daney Acked-by: David Daney Patchwork: https://patchwork.linux-mips.org/patch/3847/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/smtc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index b450ea529ddf..15b5f3cfd20c 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -615,7 +615,6 @@ void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle) void smtc_init_secondary(void) { - local_irq_enable(); } void smtc_smp_finish(void) @@ -631,6 +630,8 @@ void smtc_smp_finish(void) if (cpu > 0 && (cpu_data[cpu].vpe_id != cpu_data[cpu - 1].vpe_id)) write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ); + local_irq_enable(); + printk("TC %d going on-line as CPU %d\n", cpu_data[smp_processor_id()].tc_id, smp_processor_id()); } -- cgit v1.2.3 From 263afbdd1cdd49338c118a3064acee01c69bb501 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Thu, 19 Jul 2012 09:13:53 +0200 Subject: MIPS: Yosemite: delay irq enable to ->smp_finish() To prepare for smoothing set_cpu_[active|online]() mess up Signed-off-by: Yong Zhang Cc: Sergei Shtylyov Cc: David Daney Acked-by: David Daney Patchwork: https://patchwork.linux-mips.org/patch/3848/ Signed-off-by: Ralf Baechle --- arch/mips/pmc-sierra/yosemite/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c index b71fae231049..5edab2bc6fc0 100644 --- a/arch/mips/pmc-sierra/yosemite/smp.c +++ b/arch/mips/pmc-sierra/yosemite/smp.c @@ -115,11 +115,11 @@ static void yos_send_ipi_mask(const struct cpumask *mask, unsigned int action) */ static void __cpuinit yos_init_secondary(void) { - set_c0_status(ST0_CO | ST0_IE | ST0_IM); } static void __cpuinit yos_smp_finish(void) { + set_c0_status(ST0_CO | ST0_IM | ST0_IE); } /* Hook for after all CPUs are online */ -- cgit v1.2.3 From 5309bdac7029c7d8659d6653761f3402e17ed1ab Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Thu, 19 Jul 2012 09:13:53 +0200 Subject: MIPS: call ->smp_finish() a little late We have move irq enable to ->smp_finish. Place ->smp_finish() a little late to prepare for move set_cpu_online() into start_secondary. And it's not necessary to call cpu_set(cpu, cpu_callin_map) and synchronise_count_slave() with irq enabled. Signed-off-by: Yong Zhang Cc: Sergei Shtylyov Cc: David Daney Acked-by: David Daney Patchwork: https://patchwork.linux-mips.org/patch/3850/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/smp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 48650c818040..b181bbf410de 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -122,13 +122,14 @@ asmlinkage __cpuinit void start_secondary(void) notify_cpu_starting(cpu); - mp_ops->smp_finish(); set_cpu_sibling_map(cpu); cpu_set(cpu, cpu_callin_map); synchronise_count_slave(); + mp_ops->smp_finish(); + cpu_idle(); } -- cgit v1.2.3 From b9a09a0660aa9174e489ac531244680971950ef8 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Thu, 19 Jul 2012 09:13:53 +0200 Subject: MIPS: call set_cpu_online() on cpu being brought up with irq disabled To prevent a problem as commit 5fbd036b [sched: Cleanup cpu_active madness] and commit 2baab4e9 [sched: Fix select_fallback_rq() vs cpu_active/cpu_online] try to resolve, move set_cpu_online() to the brought up CPU and with irq disabled. Signed-off-by: Yong Zhang Cc: Sergei Shtylyov Cc: David Daney Acked-by: David Daney Patchwork: https://patchwork.linux-mips.org/patch/3851/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index b181bbf410de..eb3e2b112a0d 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -122,6 +122,8 @@ asmlinkage __cpuinit void start_secondary(void) notify_cpu_starting(cpu); + set_cpu_online(cpu, true); + set_cpu_sibling_map(cpu); cpu_set(cpu, cpu_callin_map); @@ -197,8 +199,6 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) while (!cpu_isset(cpu, cpu_callin_map)) udelay(100); - set_cpu_online(cpu, true); - return 0; } -- cgit v1.2.3 From b789ad63ac46b00f0862bdc23fc95556adc3cf2f Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Thu, 19 Jul 2012 09:13:53 +0200 Subject: MIPS: smp: Warn on too early irq enable Just to catch a potential issue. Signed-off-by: Yong Zhang Cc: Sergei Shtylyov Cc: David Daney Acked-by: David Daney Patchwork: https://patchwork.linux-mips.org/patch/3852/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/smp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index eb3e2b112a0d..1268392f1d27 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -130,6 +130,11 @@ asmlinkage __cpuinit void start_secondary(void) synchronise_count_slave(); + /* + * irq will be enabled in ->smp_finish(), enabling it too early + * is dangerous. + */ + WARN_ON_ONCE(!irqs_disabled()); mp_ops->smp_finish(); cpu_idle(); -- cgit v1.2.3 From f2b88d65aa7adaa3996088e86ae497fe705e96f3 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Thu, 19 Jul 2012 09:13:54 +0200 Subject: MIPS: sync-r4k: remove redundant irq operation Since we have delayed irq enabling to ->smp_finish() Signed-off-by: Yong Zhang Cc: Sergei Shtylyov Cc: David Daney Acked-by: David Daney Signed-off-by: Ralf Baechle --- arch/mips/kernel/sync-r4k.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c index 99f913c8d7a6..842d55e411fd 100644 --- a/arch/mips/kernel/sync-r4k.c +++ b/arch/mips/kernel/sync-r4k.c @@ -111,7 +111,6 @@ void __cpuinit synchronise_count_master(void) void __cpuinit synchronise_count_slave(void) { int i; - unsigned long flags; unsigned int initcount; int ncpus; @@ -123,8 +122,6 @@ void __cpuinit synchronise_count_slave(void) return; #endif - local_irq_save(flags); - /* * Not every cpu is online at the time this gets called, * so we first wait for the master to say everyone is ready @@ -154,7 +151,5 @@ void __cpuinit synchronise_count_slave(void) } /* Arrange for an interrupt in a short while */ write_c0_compare(read_c0_count() + COUNTON); - - local_irq_restore(flags); } #undef NR_LOOPS -- cgit v1.2.3 From 3592c3cd061fc717b90126de31912110fe0cae3c Mon Sep 17 00:00:00 2001 From: Yoichi Yuasa Date: Thu, 19 Jul 2012 07:13:54 +0200 Subject: MIPS: Fix bug.h MIPS build regression Commit: 3777808873b0c49c5cf27e44c948dfb02675d578 [bug.h: need linux/kernel.h for TAINT_WARN.] breaks all MIPS builds. CC arch/mips/kernel/machine_kexec.o In file included from include/linux/kernel.h:20:0, from include/asm-generic/bug.h:35, from /home/yuasa/src/linux/kernel/git/linux-2.6/arch/mips/include/asm/bug.h:41, from /home/yuasa/src/linux/kernel/git/linux-2.6/arch/mips/include/asm/bitops.h:20, from include/linux/bitops.h:22, from include/linux/signal.h:38, from include/linux/elfcore.h:5, from include/linux/kexec.h:60, from arch/mips/kernel/machine_kexec.c:9: include/linux/log2.h: In function '__ilog2_u32': include/linux/log2.h:34:2: error: implicit declaration of function 'fls' [-Werror=implicit-function-declaration] include/linux/log2.h: In function '__ilog2_u64': include/linux/log2.h:42:2: error: implicit declaration of function 'fls64' [-Werror=implicit-function-declaration] include/linux/log2.h: In function '__roundup_pow_of_two': include/linux/log2.h:63:2: error: implicit declaration of function 'fls_long' [-Werror=implicit-function-declaration] In file included from include/linux/bitops.h:22:0, from include/linux/signal.h:38, from include/linux/elfcore.h:5, from include/linux/kexec.h:60, from arch/mips/kernel/machine_kexec.c:9: /home/yuasa/src/linux/kernel/git/linux-2.6/arch/mips/include/asm/bitops.h: At top level: /home/yuasa/src/linux/kernel/git/linux-2.6/arch/mips/include/asm/bitops.h:615:19: error: static declaration of 'fls' follows non-static declaration include/linux/log2.h:34:9: note: previous implicit declaration of 'fls' was here In file included from /home/yuasa/src/linux/kernel/git/linux-2.6/arch/mips/include/asm/bitops.h:651:0, from include/linux/bitops.h:22, from include/linux/signal.h:38, from include/linux/elfcore.h:5, from include/linux/kexec.h:60, from arch/mips/kernel/machine_kexec.c:9: include/asm-generic/bitops/fls64.h:18:28: error: static declaration of 'fls64' follows non-static declaration include/linux/log2.h:42:9: note: previous implicit declaration of 'fls64' was here In file included from include/linux/signal.h:38:0, from include/linux/elfcore.h:5, from include/linux/kexec.h:60, from arch/mips/kernel/machine_kexec.c:9: include/linux/bitops.h:160:24: error: conflicting types for 'fls_long' include/linux/log2.h:63:16: note: previous implicit declaration of 'fls_long' was here cc1: all warnings being treated as errors make[2]: *** [arch/mips/kernel/machine_kexec.o] Error 1 Signed-off-by: Yoichi Yuasa Cc: Geert Uytterhoeven Cc: Paul Mundt Cc: yuasa@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: Linuxppc-dev Cc: Linux MIPS Mailing List Cc: Linux-sh list Cc: Chris Zankel Patchwork: https://patchwork.linux-mips.org/patch/4000/ Tested-by: John Crispin Signed-off-by: Ralf Baechle --- arch/mips/include/asm/bitops.h | 1 - arch/mips/include/asm/io.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index 2e1ad4c652b7..82ad35ce2b45 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -17,7 +17,6 @@ #include #include #include -#include #include /* sigh ... */ #include #include diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index a58f22998a86..29d9c23c20c7 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -17,6 +17,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From 85a053fa5f2d67ae5b2968305b16e8d2fe4cdf4d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 19 Jul 2012 07:13:54 +0200 Subject: MIPS: PCI: Move fixups from __init to __devinit. Fixups are executed once the pci-device is found which is during boot process so __init seems fine as long as the platform does not support hotplug. However it is possible to remove the PCI bus at run time and have it rediscovered again via "echo 1 > /sys/bus/pci/rescan" and this will call the fixups again. [ralf@linux-mips.org: Made piixirqmap[] in malta_piix_func0_fixup() __initdata.] Signed-off-by: Sebastian Andrzej Siewior Cc: linux-mips@linux-mips.org Signed-off-by: Ralf Baechle --- arch/mips/mti-malta/malta-pci.c | 2 +- arch/mips/pci/fixup-fuloong2e.c | 12 ++++++------ arch/mips/pci/fixup-lemote2f.c | 12 ++++++------ arch/mips/pci/fixup-malta.c | 6 +++--- arch/mips/pci/fixup-sb1250.c | 6 +++--- arch/mips/pci/ops-tx4927.c | 2 +- arch/mips/pci/pci-ip27.c | 2 +- arch/mips/txx9/generic/pci.c | 2 +- 8 files changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/mips/mti-malta/malta-pci.c b/arch/mips/mti-malta/malta-pci.c index 916206823d45..284dea54faf5 100644 --- a/arch/mips/mti-malta/malta-pci.c +++ b/arch/mips/mti-malta/malta-pci.c @@ -254,7 +254,7 @@ void __init mips_pcibios_init(void) } /* Enable PCI 2.1 compatibility in PIIX4 */ -static void __init quirk_dlcsetup(struct pci_dev *dev) +static void __devinit quirk_dlcsetup(struct pci_dev *dev) { u8 odlc, ndlc; (void) pci_read_config_byte(dev, 0x82, &odlc); diff --git a/arch/mips/pci/fixup-fuloong2e.c b/arch/mips/pci/fixup-fuloong2e.c index d5d4c018fb04..0857ab8c3919 100644 --- a/arch/mips/pci/fixup-fuloong2e.c +++ b/arch/mips/pci/fixup-fuloong2e.c @@ -48,7 +48,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev) return 0; } -static void __init loongson2e_nec_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_nec_fixup(struct pci_dev *pdev) { unsigned int val; @@ -60,7 +60,7 @@ static void __init loongson2e_nec_fixup(struct pci_dev *pdev) pci_write_config_dword(pdev, 0xe4, 1 << 5); } -static void __init loongson2e_686b_func0_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_686b_func0_fixup(struct pci_dev *pdev) { unsigned char c; @@ -135,7 +135,7 @@ static void __init loongson2e_686b_func0_fixup(struct pci_dev *pdev) printk(KERN_INFO"via686b fix: ISA bridge done\n"); } -static void __init loongson2e_686b_func1_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_686b_func1_fixup(struct pci_dev *pdev) { printk(KERN_INFO"via686b fix: IDE\n"); @@ -168,19 +168,19 @@ static void __init loongson2e_686b_func1_fixup(struct pci_dev *pdev) printk(KERN_INFO"via686b fix: IDE done\n"); } -static void __init loongson2e_686b_func2_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_686b_func2_fixup(struct pci_dev *pdev) { /* irq routing */ pci_write_config_byte(pdev, PCI_INTERRUPT_LINE, 10); } -static void __init loongson2e_686b_func3_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_686b_func3_fixup(struct pci_dev *pdev) { /* irq routing */ pci_write_config_byte(pdev, PCI_INTERRUPT_LINE, 11); } -static void __init loongson2e_686b_func5_fixup(struct pci_dev *pdev) +static void __devinit loongson2e_686b_func5_fixup(struct pci_dev *pdev) { unsigned int val; unsigned char c; diff --git a/arch/mips/pci/fixup-lemote2f.c b/arch/mips/pci/fixup-lemote2f.c index 4b9768d5d729..a7b917dcf604 100644 --- a/arch/mips/pci/fixup-lemote2f.c +++ b/arch/mips/pci/fixup-lemote2f.c @@ -96,21 +96,21 @@ int pcibios_plat_dev_init(struct pci_dev *dev) } /* CS5536 SPEC. fixup */ -static void __init loongson_cs5536_isa_fixup(struct pci_dev *pdev) +static void __devinit loongson_cs5536_isa_fixup(struct pci_dev *pdev) { /* the uart1 and uart2 interrupt in PIC is enabled as default */ pci_write_config_dword(pdev, PCI_UART1_INT_REG, 1); pci_write_config_dword(pdev, PCI_UART2_INT_REG, 1); } -static void __init loongson_cs5536_ide_fixup(struct pci_dev *pdev) +static void __devinit loongson_cs5536_ide_fixup(struct pci_dev *pdev) { /* setting the mutex pin as IDE function */ pci_write_config_dword(pdev, PCI_IDE_CFG_REG, CS5536_IDE_FLASH_SIGNATURE); } -static void __init loongson_cs5536_acc_fixup(struct pci_dev *pdev) +static void __devinit loongson_cs5536_acc_fixup(struct pci_dev *pdev) { /* enable the AUDIO interrupt in PIC */ pci_write_config_dword(pdev, PCI_ACC_INT_REG, 1); @@ -118,14 +118,14 @@ static void __init loongson_cs5536_acc_fixup(struct pci_dev *pdev) pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0xc0); } -static void __init loongson_cs5536_ohci_fixup(struct pci_dev *pdev) +static void __devinit loongson_cs5536_ohci_fixup(struct pci_dev *pdev) { /* enable the OHCI interrupt in PIC */ /* THE OHCI, EHCI, UDC, OTG are shared with interrupt in PIC */ pci_write_config_dword(pdev, PCI_OHCI_INT_REG, 1); } -static void __init loongson_cs5536_ehci_fixup(struct pci_dev *pdev) +static void __devinit loongson_cs5536_ehci_fixup(struct pci_dev *pdev) { u32 hi, lo; @@ -137,7 +137,7 @@ static void __init loongson_cs5536_ehci_fixup(struct pci_dev *pdev) pci_write_config_dword(pdev, PCI_EHCI_FLADJ_REG, 0x2000); } -static void __init loongson_nec_fixup(struct pci_dev *pdev) +static void __devinit loongson_nec_fixup(struct pci_dev *pdev) { unsigned int val; diff --git a/arch/mips/pci/fixup-malta.c b/arch/mips/pci/fixup-malta.c index 0f48498bc231..70073c98ed32 100644 --- a/arch/mips/pci/fixup-malta.c +++ b/arch/mips/pci/fixup-malta.c @@ -49,10 +49,10 @@ int pcibios_plat_dev_init(struct pci_dev *dev) return 0; } -static void __init malta_piix_func0_fixup(struct pci_dev *pdev) +static void __devinit malta_piix_func0_fixup(struct pci_dev *pdev) { unsigned char reg_val; - static int piixirqmap[16] __initdata = { /* PIIX PIRQC[A:D] irq mappings */ + static int piixirqmap[16] __devinitdata = { /* PIIX PIRQC[A:D] irq mappings */ 0, 0, 0, 3, 4, 5, 6, 7, 0, 9, 10, 11, @@ -83,7 +83,7 @@ static void __init malta_piix_func0_fixup(struct pci_dev *pdev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_0, malta_piix_func0_fixup); -static void __init malta_piix_func1_fixup(struct pci_dev *pdev) +static void __devinit malta_piix_func1_fixup(struct pci_dev *pdev) { unsigned char reg_val; diff --git a/arch/mips/pci/fixup-sb1250.c b/arch/mips/pci/fixup-sb1250.c index f0bb9146e6c0..d02900a72916 100644 --- a/arch/mips/pci/fixup-sb1250.c +++ b/arch/mips/pci/fixup-sb1250.c @@ -15,7 +15,7 @@ * Set the BCM1250, etc. PCI host bridge's TRDY timeout * to the finite max. */ -static void __init quirk_sb1250_pci(struct pci_dev *dev) +static void __devinit quirk_sb1250_pci(struct pci_dev *dev) { pci_write_config_byte(dev, 0x40, 0xff); } @@ -25,7 +25,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_PCI, /* * The BCM1250, etc. PCI/HT bridge reports as a host bridge. */ -static void __init quirk_sb1250_ht(struct pci_dev *dev) +static void __devinit quirk_sb1250_ht(struct pci_dev *dev) { dev->class = PCI_CLASS_BRIDGE_PCI << 8; } @@ -35,7 +35,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_HT, /* * Set the SP1011 HT/PCI bridge's TRDY timeout to the finite max. */ -static void __init quirk_sp1011(struct pci_dev *dev) +static void __devinit quirk_sp1011(struct pci_dev *dev) { pci_write_config_byte(dev, 0x64, 0xff); } diff --git a/arch/mips/pci/ops-tx4927.c b/arch/mips/pci/ops-tx4927.c index a1e7e6d80c8c..bc13e29d2bb3 100644 --- a/arch/mips/pci/ops-tx4927.c +++ b/arch/mips/pci/ops-tx4927.c @@ -495,7 +495,7 @@ irqreturn_t tx4927_pcierr_interrupt(int irq, void *dev_id) } #ifdef CONFIG_TOSHIBA_FPCIB0 -static void __init tx4927_quirk_slc90e66_bridge(struct pci_dev *dev) +static void __devinit tx4927_quirk_slc90e66_bridge(struct pci_dev *dev) { struct tx4927_pcic_reg __iomem *pcicptr = pci_bus_to_pcicptr(dev->bus); diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c index 0fbe4c0c170a..fdc24440294c 100644 --- a/arch/mips/pci/pci-ip27.c +++ b/arch/mips/pci/pci-ip27.c @@ -212,7 +212,7 @@ static inline void pci_enable_swapping(struct pci_dev *dev) bridge->b_widget.w_tflush; /* Flush */ } -static void __init pci_fixup_ioc3(struct pci_dev *d) +static void __devinit pci_fixup_ioc3(struct pci_dev *d) { pci_disable_swapping(d); } diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c index 682efb0c108d..64eb71b15280 100644 --- a/arch/mips/txx9/generic/pci.c +++ b/arch/mips/txx9/generic/pci.c @@ -269,7 +269,7 @@ txx9_i8259_irq_setup(int irq) return err; } -static void __init quirk_slc90e66_bridge(struct pci_dev *dev) +static void __devinit quirk_slc90e66_bridge(struct pci_dev *dev) { int irq; /* PCI/ISA Bridge interrupt */ u8 reg_64; -- cgit v1.2.3 From e9a09aed3eff8b1706e4bc24e8b3b16283797353 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 19 Jun 2012 14:39:54 +0200 Subject: HID: hid-multitouch: add support for Zytronic panels Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 1 + drivers/hid/hid-ids.h | 3 +++ drivers/hid/hid-multitouch.c | 5 +++++ 3 files changed, 9 insertions(+) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index bef04c192768..3fda8c87f02c 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -386,6 +386,7 @@ config HID_MULTITOUCH - Unitec Panels - XAT optical touch panels - Xiroku optical touch panels + - Zytronic touch panels If unsure, say N. diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index fc0c68e4b9ed..5e4168e7ed1c 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -805,6 +805,9 @@ #define USB_VENDOR_ID_ZYDACRON 0x13EC #define USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL 0x0006 +#define USB_VENDOR_ID_ZYTRONIC 0x14c8 +#define USB_DEVICE_ID_ZYTRONIC_ZXY100 0x0005 + #define USB_VENDOR_ID_PRIMAX 0x0461 #define USB_DEVICE_ID_PRIMAX_KEYBOARD 0x4e05 diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 6e3332a99976..76479246d4ee 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1048,6 +1048,11 @@ static const struct hid_device_id mt_devices[] = { MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, USB_DEVICE_ID_XIROKU_CSR2) }, + /* Zytronic panels */ + { .driver_data = MT_CLS_SERIAL, + MT_USB_DEVICE(USB_VENDOR_ID_ZYTRONIC, + USB_DEVICE_ID_ZYTRONIC_ZXY100) }, + /* Generic MT device */ { HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH, HID_ANY_ID, HID_ANY_ID) }, { } -- cgit v1.2.3 From 380e99fc44d79bc35af9ff1d3316ef4027ce775e Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Wed, 18 Jul 2012 10:05:26 -0300 Subject: cx25821: Remove bad strcpy to read-only char* The strcpy was being used to set the name of the board. Since the destination char* was read-only and the name is set statically at compile time; this was both wrong and redundant. The type of char* is changed to const char* to prevent future errors. Reported-by: Radek Masin Signed-off-by: Ezequiel Garcia [ Taking directly due to vacations - Linus ] Signed-off-by: Linus Torvalds --- drivers/media/video/cx25821/cx25821-core.c | 3 --- drivers/media/video/cx25821/cx25821.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/media/video/cx25821/cx25821-core.c b/drivers/media/video/cx25821/cx25821-core.c index 83c1aa6b2e6c..f11f6f07e915 100644 --- a/drivers/media/video/cx25821/cx25821-core.c +++ b/drivers/media/video/cx25821/cx25821-core.c @@ -904,9 +904,6 @@ static int cx25821_dev_setup(struct cx25821_dev *dev) list_add_tail(&dev->devlist, &cx25821_devlist); mutex_unlock(&cx25821_devlist_mutex); - strcpy(cx25821_boards[UNKNOWN_BOARD].name, "unknown"); - strcpy(cx25821_boards[CX25821_BOARD].name, "cx25821"); - if (dev->pci->device != 0x8210) { pr_info("%s(): Exiting. Incorrect Hardware device = 0x%02x\n", __func__, dev->pci->device); diff --git a/drivers/media/video/cx25821/cx25821.h b/drivers/media/video/cx25821/cx25821.h index b9aa801b00a7..029f2934a6d8 100644 --- a/drivers/media/video/cx25821/cx25821.h +++ b/drivers/media/video/cx25821/cx25821.h @@ -187,7 +187,7 @@ enum port { }; struct cx25821_board { - char *name; + const char *name; enum port porta; enum port portb; enum port portc; -- cgit v1.2.3 From c6727932cfdb13501108b16c38463c09d5ec7a74 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sat, 14 Jul 2012 14:33:09 +0300 Subject: UBIFS: fix a bug in empty space fix-up UBIFS has a feature called "empty space fix-up" which is a quirk to work-around limitations of dumb flasher programs. Namely, of those flashers that are unable to skip NAND pages full of 0xFFs while flashing, resulting in empty space at the end of half-filled eraseblocks to be unusable for UBIFS. This feature is relatively new (introduced in v3.0). The fix-up routine (fixup_free_space()) is executed only once at the very first mount if the superblock has the 'space_fixup' flag set (can be done with -F option of mkfs.ubifs). It basically reads all the UBIFS data and metadata and writes it back to the same LEB. The routine assumes the image is pristine and does not have anything in the journal. There was a bug in 'fixup_free_space()' where it fixed up the log incorrectly. All but one LEB of the log of a pristine file-system are empty. And one contains just a commit start node. And 'fixup_free_space()' just unmapped this LEB, which resulted in wiping the commit start node. As a result, some users were unable to mount the file-system next time with the following symptom: UBIFS error (pid 1): replay_log_leb: first log node at LEB 3:0 is not CS node UBIFS error (pid 1): replay_log_leb: log error detected while replaying the log at LEB 3:0 The root-cause of this bug was that 'fixup_free_space()' wrongly assumed that the beginning of empty space in the log head (c->lhead_offs) was known on mount. However, it is not the case - it was always 0. UBIFS does not store in it the master node and finds out by scanning the log on every mount. The fix is simple - just pass commit start node size instead of 0 to 'fixup_leb()'. Signed-off-by: Artem Bityutskiy Cc: stable@vger.kernel.org [v3.0+] Reported-by: Iwo Mergler Tested-by: Iwo Mergler Reported-by: James Nute --- fs/ubifs/sb.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index ef3d1ba6d992..15e2fc5aa60b 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -718,8 +718,12 @@ static int fixup_free_space(struct ubifs_info *c) lnum = ubifs_next_log_lnum(c, lnum); } - /* Fixup the current log head */ - err = fixup_leb(c, c->lhead_lnum, c->lhead_offs); + /* + * Fixup the log head which contains the only a CS node at the + * beginning. + */ + err = fixup_leb(c, c->lhead_lnum, + ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size)); if (err) goto out; -- cgit v1.2.3 From 9ff19309a9623f2963ac5a136782ea4d8b5d67fb Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Fri, 8 Jun 2012 01:19:07 +0300 Subject: ore: Fix NFS crash by supporting any unaligned RAID IO In RAID_5/6 We used to not permit an IO that it's end byte is not stripe_size aligned and spans more than one stripe. .i.e the caller must check if after submission the actual transferred bytes is shorter, and would need to resubmit a new IO with the remainder. Exofs supports this, and NFS was supposed to support this as well with it's short write mechanism. But late testing has exposed a CRASH when this is used with none-RPC layout-drivers. The change at NFS is deep and risky, in it's place the fix at ORE to lift the limitation is actually clean and simple. So here it is below. The principal here is that in the case of unaligned IO on both ends, beginning and end, we will send two read requests one like old code, before the calculation of the first stripe, and also a new site, before the calculation of the last stripe. If any "boundary" is aligned or the complete IO is within a single stripe. we do a single read like before. The code is clean and simple by splitting the old _read_4_write into 3 even parts: 1._read_4_write_first_stripe 2. _read_4_write_last_stripe 3. _read_4_write_execute And calling 1+3 at the same place as before. 2+3 before last stripe, and in the case of all in a single stripe then 1+2+3 is preformed additively. Why did I not think of it before. Well I had a strike of genius because I have stared at this code for 2 years, and did not find this simple solution, til today. Not that I did not try. This solution is much better for NFS than the previous supposedly solution because the short write was dealt with out-of-band after IO_done, which would cause for a seeky IO pattern where as in here we execute in order. At both solutions we do 2 separate reads, only here we do it within a single IO request. (And actually combine two writes into a single submission) NFS/exofs code need not change since the ORE API communicates the new shorter length on return, what will happen is that this case would not occur anymore. hurray!! [Stable this is an NFS bug since 3.2 Kernel should apply cleanly] CC: Stable Tree Signed-off-by: Boaz Harrosh --- fs/exofs/ore_raid.c | 67 ++++++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index d222c77cfa1b..fff2070c6751 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c @@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) * ios->sp2d[p][*], xor is calculated the same way. These pages are * allocated/freed and don't go through cache */ -static int _read_4_write(struct ore_io_state *ios) +static int _read_4_write_first_stripe(struct ore_io_state *ios) { - struct ore_io_state *ios_read; struct ore_striping_info read_si; struct __stripe_pages_2d *sp2d = ios->sp2d; u64 offset = ios->si.first_stripe_start; - u64 last_stripe_end; - unsigned bytes_in_stripe = ios->si.bytes_in_stripe; - unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1; - int ret; + unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1; if (offset == ios->offset) /* Go to start collect $200 */ goto read_last_stripe; @@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios) min_p = _sp2d_min_pg(sp2d); max_p = _sp2d_max_pg(sp2d); + ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n", + offset, ios->offset, min_p, max_p); + for (c = 0; ; c++) { ore_calc_stripe_info(ios->layout, offset, 0, &read_si); read_si.obj_offset += min_p * PAGE_SIZE; @@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios) } read_last_stripe: + return 0; +} + +static int _read_4_write_last_stripe(struct ore_io_state *ios) +{ + struct ore_striping_info read_si; + struct __stripe_pages_2d *sp2d = ios->sp2d; + u64 offset; + u64 last_stripe_end; + unsigned bytes_in_stripe = ios->si.bytes_in_stripe; + unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1; + offset = ios->offset + ios->length; if (offset % PAGE_SIZE) _add_to_r4w_last_page(ios, &offset); @@ -527,15 +538,15 @@ read_last_stripe: c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, ios->layout->mirrors_p1, read_si.par_dev, read_si.dev); - BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end); - /* unaligned IO must be within a single stripe */ - if (min_p == sp2d->pages_in_unit) { /* Didn't do it yet */ min_p = _sp2d_min_pg(sp2d); max_p = _sp2d_max_pg(sp2d); } + ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n", + offset, last_stripe_end, min_p, max_p); + while (offset < last_stripe_end) { struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; @@ -568,6 +579,15 @@ read_last_stripe: } read_it: + return 0; +} + +static int _read_4_write_execute(struct ore_io_state *ios) +{ + struct ore_io_state *ios_read; + unsigned i; + int ret; + ios_read = ios->ios_read_4_write; if (!ios_read) return 0; @@ -591,6 +611,8 @@ read_it: } _mark_read4write_pages_uptodate(ios_read, ret); + ore_put_io_state(ios_read); + ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */ return 0; } @@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios, /* If first stripe, Read in all read4write pages * (if needed) before we calculate the first parity. */ - _read_4_write(ios); + _read_4_write_first_stripe(ios); } + if (!cur_len) /* If last stripe r4w pages of last stripe */ + _read_4_write_last_stripe(ios); + _read_4_write_execute(ios); for (i = 0; i < num_pages; i++) { pages[i] = _raid_page_alloc(); @@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios, int _ore_post_alloc_raid_stuff(struct ore_io_state *ios) { - struct ore_layout *layout = ios->layout; - if (ios->parity_pages) { + struct ore_layout *layout = ios->layout; unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE; - unsigned stripe_size = ios->si.bytes_in_stripe; - u64 last_stripe, first_stripe; if (_sp2d_alloc(pages_in_unit, layout->group_width, layout->parity, &ios->sp2d)) { return -ENOMEM; } - - /* Round io down to last full strip */ - first_stripe = div_u64(ios->offset, stripe_size); - last_stripe = div_u64(ios->offset + ios->length, stripe_size); - - /* If an IO spans more then a single stripe it must end at - * a stripe boundary. The reminder at the end is pushed into the - * next IO. - */ - if (last_stripe != first_stripe) { - ios->length = last_stripe * stripe_size - ios->offset; - - BUG_ON(!ios->length); - ios->nr_pages = (ios->length + PAGE_SIZE - 1) / - PAGE_SIZE; - ios->si.length = ios->length; /*make it consistent */ - } } return 0; } -- cgit v1.2.3 From 62b62ad873f2accad9222a4d7ffbe1e93f6714c1 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Fri, 8 Jun 2012 04:30:40 +0300 Subject: ore: Remove support of partial IO request (NFS crash) Do to OOM situations the ore might fail to allocate all resources needed for IO of the full request. If some progress was possible it would proceed with a partial/short request, for the sake of forward progress. Since this crashes NFS-core and exofs is just fine without it just remove this contraption, and fail. TODO: Support real forward progress with some reserved allocations of resources, such as mem pools and/or bio_sets [Bug since 3.2 Kernel] CC: Stable Tree CC: Benny Halevy Signed-off-by: Boaz Harrosh --- fs/exofs/ore.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 49cf230554a2..24a49d47e935 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios) out: ios->numdevs = devs_in_group; ios->pages_consumed = cur_pg; - if (unlikely(ret)) { - if (length == ios->length) - return ret; - else - ios->length -= length; - } - return 0; + return ret; } int ore_create(struct ore_io_state *ios) -- cgit v1.2.3 From 537632e0a54a5355cdd0330911d18c3b773f9cf7 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 11 Jul 2012 15:27:13 +0300 Subject: ore: Unlock r4w pages in exact reverse order of locking The read-4-write pages are locked in address ascending order. But where unlocked in a way easiest for coding. Fix that, locks should be released in opposite order of locking, .i.e descending address order. I have not hit this dead-lock. It was found by inspecting the dbug print-outs. I suspect there is an higher lock at caller that protects us, but fix it regardless. Signed-off-by: Boaz Harrosh --- fs/exofs/ore_raid.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index fff2070c6751..5f376d14fdcc 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c @@ -144,26 +144,26 @@ static void _sp2d_reset(struct __stripe_pages_2d *sp2d, { unsigned data_devs = sp2d->data_devs; unsigned group_width = data_devs + sp2d->parity; - unsigned p; + int p, c; if (!sp2d->needed) return; - for (p = 0; p < sp2d->pages_in_unit; p++) { - struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; - - if (_1ps->write_count < group_width) { - unsigned c; + for (c = data_devs - 1; c >= 0; --c) + for (p = sp2d->pages_in_unit - 1; p >= 0; --p) { + struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; - for (c = 0; c < data_devs; c++) - if (_1ps->page_is_read[c]) { - struct page *page = _1ps->pages[c]; + if (_1ps->page_is_read[c]) { + struct page *page = _1ps->pages[c]; - r4w->put_page(priv, page); - _1ps->page_is_read[c] = false; - } + r4w->put_page(priv, page); + _1ps->page_is_read[c] = false; + } } + for (p = 0; p < sp2d->pages_in_unit; p++) { + struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; + memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages)); _1ps->write_count = 0; _1ps->tx = NULL; -- cgit v1.2.3 From 9909d45a8557455ca5f8ee7af0f253debc851f1a Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Fri, 8 Jun 2012 05:29:40 +0300 Subject: pnfs-obj: don't leak objio_state if ore_write/read fails [Bug since 3.2 Kernel] CC: Stable Tree Signed-off-by: Boaz Harrosh --- fs/nfs/objlayout/objio_osd.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index b47277baebab..86d7595aca8f 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -454,7 +454,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata) objios->ios->done = _read_done; dprintk("%s: offset=0x%llx length=0x%x\n", __func__, rdata->args.offset, rdata->args.count); - return ore_read(objios->ios); + ret = ore_read(objios->ios); + if (unlikely(ret)) + objio_free_result(&objios->oir); + return ret; } /* @@ -539,8 +542,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how) dprintk("%s: offset=0x%llx length=0x%x\n", __func__, wdata->args.offset, wdata->args.count); ret = ore_write(objios->ios); - if (unlikely(ret)) + if (unlikely(ret)) { + objio_free_result(&objios->oir); return ret; + } if (objios->sync) _write_done(objios->ios, objios); -- cgit v1.2.3 From c999ff68029ebd0f56ccae75444f640f6d5a27d2 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Fri, 8 Jun 2012 02:02:30 +0300 Subject: pnfs-obj: Fix __r4w_get_page when offset is beyond i_size It is very common for the end of the file to be unaligned on stripe size. But since we know it's beyond file's end then the XOR should be preformed with all zeros. Old code used to just read zeros out of the OSD devices, which is a great waist. But what scares me more about this situation is that, we now have pages attached to the file's mapping that are beyond i_size. I don't like the kind of bugs this calls for. Fix both birds, by returning a global zero_page, if offset is beyond i_size. TODO: Change the API to ->__r4w_get_page() so a NULL can be returned without being considered as error, since XOR API treats NULL entries as zero_pages. [Bug since 3.2. Should apply the same way to all Kernels since] CC: Stable Tree Signed-off-by: Boaz Harrosh --- fs/nfs/objlayout/objio_osd.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 86d7595aca8f..f50d3e8d6f22 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -489,8 +489,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) struct nfs_write_data *wdata = objios->oir.rpcdata; struct address_space *mapping = wdata->header->inode->i_mapping; pgoff_t index = offset / PAGE_SIZE; - struct page *page = find_get_page(mapping, index); + struct page *page; + loff_t i_size = i_size_read(wdata->header->inode); + if (offset >= i_size) { + *uptodate = true; + dprintk("%s: g_zero_page index=0x%lx\n", __func__, index); + return ZERO_PAGE(0); + } + + page = find_get_page(mapping, index); if (!page) { page = find_or_create_page(mapping, index, GFP_NOFS); if (unlikely(!page)) { @@ -510,8 +518,10 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) static void __r4w_put_page(void *priv, struct page *page) { - dprintk("%s: index=0x%lx\n", __func__, page->index); - page_cache_release(page); + dprintk("%s: index=0x%lx\n", __func__, + (page == ZERO_PAGE(0)) ? -1UL : page->index); + if (ZERO_PAGE(0) != page) + page_cache_release(page); return; } -- cgit v1.2.3 From 751f188dd5ab95b3f2b5f2f467c38aae5a2877eb Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 20 Jul 2012 14:25:03 +0100 Subject: dm raid1: fix crash with mirror recovery and discard This patch fixes a crash when a discard request is sent during mirror recovery. Firstly, some background. Generally, the following sequence happens during mirror synchronization: - function do_recovery is called - do_recovery calls dm_rh_recovery_prepare - dm_rh_recovery_prepare uses a semaphore to limit the number simultaneously recovered regions (by default the semaphore value is 1, so only one region at a time is recovered) - dm_rh_recovery_prepare calls __rh_recovery_prepare, __rh_recovery_prepare asks the log driver for the next region to recover. Then, it sets the region state to DM_RH_RECOVERING. If there are no pending I/Os on this region, the region is added to quiesced_regions list. If there are pending I/Os, the region is not added to any list. It is added to the quiesced_regions list later (by dm_rh_dec function) when all I/Os finish. - when the region is on quiesced_regions list, there are no I/Os in flight on this region. The region is popped from the list in dm_rh_recovery_start function. Then, a kcopyd job is started in the recover function. - when the kcopyd job finishes, recovery_complete is called. It calls dm_rh_recovery_end. dm_rh_recovery_end adds the region to recovered_regions or failed_recovered_regions list (depending on whether the copy operation was successful or not). The above mechanism assumes that if the region is in DM_RH_RECOVERING state, no new I/Os are started on this region. When I/O is started, dm_rh_inc_pending is called, which increases reg->pending count. When I/O is finished, dm_rh_dec is called. It decreases reg->pending count. If the count is zero and the region was in DM_RH_RECOVERING state, dm_rh_dec adds it to the quiesced_regions list. Consequently, if we call dm_rh_inc_pending/dm_rh_dec while the region is in DM_RH_RECOVERING state, it could be added to quiesced_regions list multiple times or it could be added to this list when kcopyd is copying data (it is assumed that the region is not on any list while kcopyd does its jobs). This results in memory corruption and crash. There already exist bypasses for REQ_FLUSH requests: REQ_FLUSH requests do not belong to any region, so they are always added to the sync list in do_writes. dm_rh_inc_pending does not increase count for REQ_FLUSH requests. In mirror_end_io, dm_rh_dec is never called for REQ_FLUSH requests. These bypasses avoid the crash possibility described above. These bypasses were improperly implemented for REQ_DISCARD when the mirror target gained discard support in commit 5fc2ffeabb9ee0fc0e71ff16b49f34f0ed3d05b4 (dm raid1: support discard). In do_writes, REQ_DISCARD requests is always added to the sync queue and immediately dispatched (even if the region is in DM_RH_RECOVERING). However, dm_rh_inc and dm_rh_dec is called for REQ_DISCARD resusts. So it violates the rule that no I/Os are started on DM_RH_RECOVERING regions, and causes the list corruption described above. This patch changes it so that REQ_DISCARD requests follow the same path as REQ_FLUSH. This avoids the crash. Reference: https://bugzilla.redhat.com/837607 Signed-off-by: Mikulas Patocka Cc: stable@kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid1.c | 2 +- drivers/md/dm-region-hash.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index d039de8322f0..ea16984c0f08 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1214,7 +1214,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, * We need to dec pending if this was a write. */ if (rw == WRITE) { - if (!(bio->bi_rw & REQ_FLUSH)) + if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) dm_rh_dec(ms->rh, map_context->ll); return error; } diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index 7771ed212182..69732e03eb34 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c @@ -404,6 +404,9 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio) return; } + if (bio->bi_rw & REQ_DISCARD) + return; + /* We must inform the log that the sync count has changed. */ log->type->set_region_sync(log, region, 0); @@ -524,7 +527,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios) struct bio *bio; for (bio = bios->head; bio; bio = bio->bi_next) { - if (bio->bi_rw & REQ_FLUSH) + if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)) continue; rh_inc(rh, dm_rh_bio_to_region(rh, bio)); } -- cgit v1.2.3 From 650d2a06b4fe1cc1d218c20e256650f68bf0ca31 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 20 Jul 2012 14:25:05 +0100 Subject: dm thin: do not send discards to shared blocks When process_discard receives a partial discard that doesn't cover a full block, it sends this discard down to that block. Unfortunately, the block can be shared and the discard would corrupt the other snapshots sharing this block. This patch detects block sharing and ends the discard with success when sending it to the shared block. The above change means that if the device supports discard it can't be guaranteed that a discard request zeroes data. Therefore, we set ti->discard_zeroes_data_unsupported. Thin target discard support with this bug arrived in commit 104655fd4dcebd50068ef30253a001da72e3a081 (dm thin: support discards). Signed-off-by: Mikulas Patocka Cc: stable@kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index ce59824fb414..68694da0d21d 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1245,7 +1245,10 @@ static void process_discard(struct thin_c *tc, struct bio *bio) cell_release_singleton(cell, bio); cell_release_singleton(cell2, bio); - remap_and_issue(tc, bio, lookup_result.block); + if ((!lookup_result.shared) && pool->pf.discard_passdown) + remap_and_issue(tc, bio, lookup_result.block); + else + bio_endio(bio, 0); } break; @@ -2628,6 +2631,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) if (tc->pool->pf.discard_enabled) { ti->discards_supported = 1; ti->num_discard_requests = 1; + ti->discard_zeroes_data_unsupported = 1; } dm_put(pool_md); -- cgit v1.2.3 From 7c8d3a42fe1c58a7e8fd3f6a013e7d7b474ff931 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 20 Jul 2012 14:25:07 +0100 Subject: dm raid1: set discard_zeroes_data_unsupported We can't guarantee that REQ_DISCARD on dm-mirror zeroes the data even if the underlying disks support zero on discard. So this patch sets ti->discard_zeroes_data_unsupported. For example, if the mirror is in the process of resynchronizing, it may happen that kcopyd reads a piece of data, then discard is sent on the same area and then kcopyd writes the piece of data to another leg. Consequently, the data is not zeroed. The flag was made available by commit 983c7db347db8ce2d8453fd1d89b7a4bb6920d56 (dm crypt: always disable discard_zeroes_data). Signed-off-by: Mikulas Patocka Cc: stable@kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index ea16984c0f08..b58b7a33914a 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1084,6 +1084,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->split_io = dm_rh_get_region_size(ms->rh); ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->discard_zeroes_data_unsupported = 1; ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0); -- cgit v1.2.3 From bc792e612e78a24ae0b30cc5b85f2368379ba4d4 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Fri, 20 Jul 2012 17:27:37 -0700 Subject: kdb: Revive dmesg command The kgdb dmesg command is broken after the printk rework. The old logic in kdb code makes no sense in terms of current printk/logging storage format, and KDB simply hangs forever. This patch revives the command by switching to kmsg_dumper iterator. The code is now much more simpler and shorter. Signed-off-by: Anton Vorontsov Signed-off-by: Linus Torvalds --- kernel/debug/kdb/kdb_main.c | 91 ++++++++++++++++----------------------------- 1 file changed, 33 insertions(+), 58 deletions(-) diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 67b847dfa2bb..df17c935d3c6 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -2040,8 +2041,15 @@ static int kdb_env(int argc, const char **argv) */ static int kdb_dmesg(int argc, const char **argv) { - char *syslog_data[4], *start, *end, c = '\0', *p; - int diag, logging, logsize, lines = 0, adjust = 0, n; + int diag; + int logging; + int lines = 0; + int adjust = 0; + int n = 0; + int skip = 0; + struct kmsg_dumper dumper = { .active = 1 }; + size_t len; + char buf[201]; if (argc > 2) return KDB_ARGCOUNT; @@ -2064,22 +2072,10 @@ static int kdb_dmesg(int argc, const char **argv) kdb_set(2, setargs); } - /* syslog_data[0,1] physical start, end+1. syslog_data[2,3] - * logical start, end+1. */ - kdb_syslog_data(syslog_data); - if (syslog_data[2] == syslog_data[3]) - return 0; - logsize = syslog_data[1] - syslog_data[0]; - start = syslog_data[2]; - end = syslog_data[3]; -#define KDB_WRAP(p) (((p - syslog_data[0]) % logsize) + syslog_data[0]) - for (n = 0, p = start; p < end; ++p) { - c = *KDB_WRAP(p); - if (c == '\n') - ++n; - } - if (c != '\n') - ++n; + kmsg_dump_rewind(&dumper); + while (kmsg_dump_get_line(&dumper, 1, NULL, 0, NULL)) + n++; + if (lines < 0) { if (adjust >= n) kdb_printf("buffer only contains %d lines, nothing " @@ -2087,21 +2083,11 @@ static int kdb_dmesg(int argc, const char **argv) else if (adjust - lines >= n) kdb_printf("buffer only contains %d lines, last %d " "lines printed\n", n, n - adjust); - if (adjust) { - for (; start < end && adjust; ++start) { - if (*KDB_WRAP(start) == '\n') - --adjust; - } - if (start < end) - ++start; - } - for (p = start; p < end && lines; ++p) { - if (*KDB_WRAP(p) == '\n') - ++lines; - } - end = p; + skip = adjust; + lines = abs(lines); } else if (lines > 0) { - int skip = n - (adjust + lines); + skip = n - lines - adjust; + lines = abs(lines); if (adjust >= n) { kdb_printf("buffer only contains %d lines, " "nothing printed\n", n); @@ -2112,35 +2098,24 @@ static int kdb_dmesg(int argc, const char **argv) kdb_printf("buffer only contains %d lines, first " "%d lines printed\n", n, lines); } - for (; start < end && skip; ++start) { - if (*KDB_WRAP(start) == '\n') - --skip; - } - for (p = start; p < end && lines; ++p) { - if (*KDB_WRAP(p) == '\n') - --lines; - } - end = p; + } else { + lines = n; } - /* Do a line at a time (max 200 chars) to reduce protocol overhead */ - c = '\n'; - while (start != end) { - char buf[201]; - p = buf; - if (KDB_FLAG(CMD_INTERRUPT)) - return 0; - while (start < end && (c = *KDB_WRAP(start)) && - (p - buf) < sizeof(buf)-1) { - ++start; - *p++ = c; - if (c == '\n') - break; + + if (skip >= n || skip < 0) + return 0; + + kmsg_dump_rewind(&dumper); + while (kmsg_dump_get_line(&dumper, 1, buf, sizeof(buf), &len)) { + if (skip) { + skip--; + continue; } - *p = '\0'; - kdb_printf("%s", buf); + if (!lines--) + break; + + kdb_printf("%.*s\n", (int)len - 1, buf); } - if (c != '\n') - kdb_printf("\n"); return 0; } -- cgit v1.2.3 From 1b499d05eecbe04969516717a8e15afb6ad80689 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Fri, 20 Jul 2012 17:27:54 -0700 Subject: printk: Remove kdb_syslog_data The function is no longer needed, so remove it. Signed-off-by: Anton Vorontsov Signed-off-by: Linus Torvalds --- kernel/debug/kdb/kdb_private.h | 1 - kernel/printk.c | 15 --------------- 2 files changed, 16 deletions(-) diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 47c4e56e513b..392ec6a25844 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -205,7 +205,6 @@ extern char kdb_grep_string[]; extern int kdb_grep_leading; extern int kdb_grep_trailing; extern char *kdb_cmds[]; -extern void kdb_syslog_data(char *syslog_data[]); extern unsigned long kdb_task_state_string(const char *); extern char kdb_task_state_char (const struct task_struct *); extern unsigned long kdb_task_state(const struct task_struct *p, diff --git a/kernel/printk.c b/kernel/printk.c index 177fa49357a5..c8129678dfbf 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1192,21 +1192,6 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) return do_syslog(type, buf, len, SYSLOG_FROM_CALL); } -#ifdef CONFIG_KGDB_KDB -/* kdb dmesg command needs access to the syslog buffer. do_syslog() - * uses locks so it cannot be used during debugging. Just tell kdb - * where the start and end of the physical and logical logs are. This - * is equivalent to do_syslog(3). - */ -void kdb_syslog_data(char *syslog_data[4]) -{ - syslog_data[0] = log_buf; - syslog_data[1] = log_buf + log_buf_len; - syslog_data[2] = log_buf + log_first_idx; - syslog_data[3] = log_buf + log_next_idx; -} -#endif /* CONFIG_KGDB_KDB */ - static bool __read_mostly ignore_loglevel; static int __init ignore_loglevel_setup(char *str) -- cgit v1.2.3 From 533827c921c34310f63e859e1d6d0feec439657d Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Fri, 20 Jul 2012 17:28:07 -0700 Subject: printk: Implement some unlocked kmsg_dump functions If used from KDB, the locked variants are prone to deadlocks (suppose we got to the debugger w/ the logbuf lock held). So, we have to implement a few routines that grab no logbuf lock. Yet we don't need these functions in modules, so we don't export them. Signed-off-by: Anton Vorontsov Signed-off-by: Linus Torvalds --- include/linux/kmsg_dump.h | 16 +++++++++++ kernel/printk.c | 68 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 71 insertions(+), 13 deletions(-) diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index d6bd50110ec2..2e7a1e032c71 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -55,12 +55,17 @@ struct kmsg_dumper { #ifdef CONFIG_PRINTK void kmsg_dump(enum kmsg_dump_reason reason); +bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, + char *line, size_t size, size_t *len); + bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, char *line, size_t size, size_t *len); bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, char *buf, size_t size, size_t *len); +void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper); + void kmsg_dump_rewind(struct kmsg_dumper *dumper); int kmsg_dump_register(struct kmsg_dumper *dumper); @@ -71,6 +76,13 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason) { } +static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, + bool syslog, const char *line, + size_t size, size_t *len) +{ + return false; +} + static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, const char *line, size_t size, size_t *len) { @@ -83,6 +95,10 @@ static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, return false; } +static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) +{ +} + static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper) { } diff --git a/kernel/printk.c b/kernel/printk.c index c8129678dfbf..ac4bc9e79465 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -2510,7 +2510,7 @@ void kmsg_dump(enum kmsg_dump_reason reason) } /** - * kmsg_dump_get_line - retrieve one kmsg log line + * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version) * @dumper: registered kmsg dumper * @syslog: include the "<4>" prefixes * @line: buffer to copy the line to @@ -2525,11 +2525,12 @@ void kmsg_dump(enum kmsg_dump_reason reason) * * A return value of FALSE indicates that there are no more records to * read. + * + * The function is similar to kmsg_dump_get_line(), but grabs no locks. */ -bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, - char *line, size_t size, size_t *len) +bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, + char *line, size_t size, size_t *len) { - unsigned long flags; struct log *msg; size_t l = 0; bool ret = false; @@ -2537,7 +2538,6 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, if (!dumper->active) goto out; - raw_spin_lock_irqsave(&logbuf_lock, flags); if (dumper->cur_seq < log_first_seq) { /* messages are gone, move to first available one */ dumper->cur_seq = log_first_seq; @@ -2545,10 +2545,8 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, } /* last entry */ - if (dumper->cur_seq >= log_next_seq) { - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + if (dumper->cur_seq >= log_next_seq) goto out; - } msg = log_from_idx(dumper->cur_idx); l = msg_print_text(msg, 0, syslog, line, size); @@ -2556,12 +2554,41 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, dumper->cur_idx = log_next(dumper->cur_idx); dumper->cur_seq++; ret = true; - raw_spin_unlock_irqrestore(&logbuf_lock, flags); out: if (len) *len = l; return ret; } + +/** + * kmsg_dump_get_line - retrieve one kmsg log line + * @dumper: registered kmsg dumper + * @syslog: include the "<4>" prefixes + * @line: buffer to copy the line to + * @size: maximum size of the buffer + * @len: length of line placed into buffer + * + * Start at the beginning of the kmsg buffer, with the oldest kmsg + * record, and copy one record into the provided buffer. + * + * Consecutive calls will return the next available record moving + * towards the end of the buffer with the youngest messages. + * + * A return value of FALSE indicates that there are no more records to + * read. + */ +bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, + char *line, size_t size, size_t *len) +{ + unsigned long flags; + bool ret; + + raw_spin_lock_irqsave(&logbuf_lock, flags); + ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); + raw_spin_unlock_irqrestore(&logbuf_lock, flags); + + return ret; +} EXPORT_SYMBOL_GPL(kmsg_dump_get_line); /** @@ -2663,6 +2690,24 @@ out: } EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); +/** + * kmsg_dump_rewind_nolock - reset the interator (unlocked version) + * @dumper: registered kmsg dumper + * + * Reset the dumper's iterator so that kmsg_dump_get_line() and + * kmsg_dump_get_buffer() can be called again and used multiple + * times within the same dumper.dump() callback. + * + * The function is similar to kmsg_dump_rewind(), but grabs no locks. + */ +void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) +{ + dumper->cur_seq = clear_seq; + dumper->cur_idx = clear_idx; + dumper->next_seq = log_next_seq; + dumper->next_idx = log_next_idx; +} + /** * kmsg_dump_rewind - reset the interator * @dumper: registered kmsg dumper @@ -2676,10 +2721,7 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper) unsigned long flags; raw_spin_lock_irqsave(&logbuf_lock, flags); - dumper->cur_seq = clear_seq; - dumper->cur_idx = clear_idx; - dumper->next_seq = log_next_seq; - dumper->next_idx = log_next_idx; + kmsg_dump_rewind_nolock(dumper); raw_spin_unlock_irqrestore(&logbuf_lock, flags); } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); -- cgit v1.2.3 From c064da47144b11be4697a4611f640086a663016a Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Fri, 20 Jul 2012 17:28:25 -0700 Subject: kdb: Switch to nolock variants of kmsg_dump functions The locked variants are prone to deadlocks (suppose we got to the debugger w/ the logbuf lock held), so let's switch to nolock variants. Signed-off-by: Anton Vorontsov Signed-off-by: Linus Torvalds --- kernel/debug/kdb/kdb_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index df17c935d3c6..1f91413edb87 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2072,8 +2072,8 @@ static int kdb_dmesg(int argc, const char **argv) kdb_set(2, setargs); } - kmsg_dump_rewind(&dumper); - while (kmsg_dump_get_line(&dumper, 1, NULL, 0, NULL)) + kmsg_dump_rewind_nolock(&dumper); + while (kmsg_dump_get_line_nolock(&dumper, 1, NULL, 0, NULL)) n++; if (lines < 0) { @@ -2105,8 +2105,8 @@ static int kdb_dmesg(int argc, const char **argv) if (skip >= n || skip < 0) return 0; - kmsg_dump_rewind(&dumper); - while (kmsg_dump_get_line(&dumper, 1, buf, sizeof(buf), &len)) { + kmsg_dump_rewind_nolock(&dumper); + while (kmsg_dump_get_line_nolock(&dumper, 1, buf, sizeof(buf), &len)) { if (skip) { skip--; continue; -- cgit v1.2.3 From bff9d1865640dcb4d9711dcc50714e9a8b859453 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 21 Jul 2012 20:24:52 +0200 Subject: Remove SYSTEM_SUSPEND_DISK system state The SYSTEM_SUSPEND_DISK system state is never used, so drop it. Signed-off-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e07f5e0c5df4..604382143bcf 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -377,7 +377,6 @@ extern enum system_states { SYSTEM_HALT, SYSTEM_POWER_OFF, SYSTEM_RESTART, - SYSTEM_SUSPEND_DISK, } system_state; #define TAINT_PROPRIETARY_MODULE 0 -- cgit v1.2.3 From 28a33cbc24e4256c143dce96c7d93bf423229f92 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 21 Jul 2012 13:58:29 -0700 Subject: Linux 3.5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index aa8e315f26ef..4bb09e1b1230 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 5 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = Saber-toothed Squirrel # *DOCUMENTATION* -- cgit v1.2.3 From 36d93d88a5396baa135f8bcde7b8501dfe3b8e53 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 22 Jun 2012 16:25:19 +0200 Subject: Revert "x86/early_printk: Replace obsolete simple_strtoul() usage with kstrtoint()" This reverts commit fbd24153c48b8425b09c161a020483cd77da870e. This commit is subtly buggy: kstrto*int() can return an error but it's not checked in every path. simple_strtoul() on the other hand could not fail, so this patch subtly intruduces new failure modes. Signed-off-by: Shuah Khan Link: http://lkml.kernel.org/r/1338424803.3569.5.camel@lorien2 Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 5e4771266f1a..9b9f18b49918 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -119,7 +119,7 @@ static __init void early_serial_init(char *s) unsigned char c; unsigned divisor; unsigned baud = DEFAULT_BAUD; - ssize_t ret; + char *e; if (*s == ',') ++s; @@ -127,14 +127,14 @@ static __init void early_serial_init(char *s) if (*s) { unsigned port; if (!strncmp(s, "0x", 2)) { - ret = kstrtoint(s, 16, &early_serial_base); + early_serial_base = simple_strtoul(s, &e, 16); } else { static const int __initconst bases[] = { 0x3f8, 0x2f8 }; if (!strncmp(s, "ttyS", 4)) s += 4; - ret = kstrtouint(s, 10, &port); - if (ret || port > 1) + port = simple_strtoul(s, &e, 10); + if (port > 1 || s == e) port = 0; early_serial_base = bases[port]; } @@ -149,8 +149,8 @@ static __init void early_serial_init(char *s) outb(0x3, early_serial_base + MCR); /* DTR + RTS */ if (*s) { - ret = kstrtouint(s, 0, &baud); - if (ret || baud == 0) + baud = simple_strtoul(s, &e, 0); + if (baud == 0 || s == e) baud = DEFAULT_BAUD; } -- cgit v1.2.3