From 364829b1263b44aa60383824e4c1289d83d78ca7 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 24 Nov 2010 15:13:16 -0800 Subject: tracing: Fix panic when lseek() called on "trace" opened for writing The file_ops struct for the "trace" special file defined llseek as seq_lseek(). However, if the file was opened for writing only, seq_open() was not called, and the seek would dereference a null pointer, file->private_data. This patch introduces a new wrapper for seq_lseek() which checks if the file descriptor is opened for reading first. If not, it does nothing. Cc: Signed-off-by: Slava Pestov LKML-Reference: <1290640396-24179-1-git-send-email-slavapestov@google.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ee6a7339cf0e..21db0deb5c7d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2339,11 +2339,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf, return count; } +static loff_t tracing_seek(struct file *file, loff_t offset, int origin) +{ + if (file->f_mode & FMODE_READ) + return seq_lseek(file, offset, origin); + else + return 0; +} + static const struct file_operations tracing_fops = { .open = tracing_open, .read = seq_read, .write = tracing_write_stub, - .llseek = seq_lseek, + .llseek = tracing_seek, .release = tracing_release, }; -- cgit v1.2.3 From 15664125f7cadcb6d725cb2d9b90f9715397848d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Nov 2010 10:43:55 +0100 Subject: scripts/tags.sh: Add magic for trace-events Make tags find the trace-event definitions Acked-by: WANG Cong Signed-off-by: Peter Zijlstra LKML-Reference: <1290591835.2072.438.camel@laptop> Signed-off-by: Steven Rostedt --- scripts/tags.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/tags.sh b/scripts/tags.sh index 8509bb512935..bbbe584d4494 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -125,7 +125,9 @@ exuberant() -I DEFINE_TRACE,EXPORT_TRACEPOINT_SYMBOL,EXPORT_TRACEPOINT_SYMBOL_GPL \ --extra=+f --c-kinds=-px \ --regex-asm='/^ENTRY\(([^)]*)\).*/\1/' \ - --regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' + --regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' \ + --regex-c++='/^TRACE_EVENT\(([^,)]*).*/trace_\1/' \ + --regex-c++='/^DEFINE_EVENT\(([^,)]*).*/trace_\1/' all_kconfigs | xargs $1 -a \ --langdef=kconfig --language-force=kconfig \ -- cgit v1.2.3 From e63233f75a1a6bfa97ffb52a20cc6801a4c63fb2 Mon Sep 17 00:00:00 2001 From: John Reiser Date: Mon, 22 Nov 2010 19:41:44 -0800 Subject: ftrace: Have recordmcount honor endianness in fn_ELF_R_INFO It looks to me like the change which introduced "virtual functions" forgot about cross-platform endianness. Thank you to Arnaud for supplying before+after data files do_mounts*.o. This fixes a MIPS build failure triggered by recordmcount. Reported-by: Arnaud Lacombe Tested-by: Arnaud Lacombe Acked-by: Wu Zhangjin Acked-by: Ralf Baechle Signed-off-by: John Reiser Signed-off-by: Steven Rostedt --- scripts/recordmcount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index 58e933a20544..39667174971d 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -119,7 +119,7 @@ static uint_t (*Elf_r_sym)(Elf_Rel const *rp) = fn_ELF_R_SYM; static void fn_ELF_R_INFO(Elf_Rel *const rp, unsigned sym, unsigned type) { - rp->r_info = ELF_R_INFO(sym, type); + rp->r_info = _w(ELF_R_INFO(sym, type)); } static void (*Elf_r_info)(Elf_Rel *const rp, unsigned sym, unsigned type) = fn_ELF_R_INFO; -- cgit v1.2.3 From 5167695753c63444a9e6cbbef136200a16c7a225 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 Dec 2010 14:18:20 +0100 Subject: perf: Fix duplicate events with multiple-pmu vs software events Because the multi-pmu bits can share contexts between struct pmu instances we could get duplicate events by iterating the pmu list. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + kernel/perf_event.c | 35 +++++++++++++++++++++++++++++------ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index de2c41758e29..4f1279e105ee 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -887,6 +887,7 @@ struct perf_cpu_context { int exclusive; struct list_head rotation_list; int jiffies_interval; + struct pmu *active_pmu; }; struct perf_output_handle { diff --git a/kernel/perf_event.c b/kernel/perf_event.c index eac7e3364335..7b870174c56d 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3824,6 +3824,8 @@ static void perf_event_task_event(struct perf_task_event *task_event) rcu_read_lock(); list_for_each_entry_rcu(pmu, &pmus, entry) { cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); + if (cpuctx->active_pmu != pmu) + goto next; perf_event_task_ctx(&cpuctx->ctx, task_event); ctx = task_event->task_ctx; @@ -3959,6 +3961,8 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) rcu_read_lock(); list_for_each_entry_rcu(pmu, &pmus, entry) { cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); + if (cpuctx->active_pmu != pmu) + goto next; perf_event_comm_ctx(&cpuctx->ctx, comm_event); ctxn = pmu->task_ctx_nr; @@ -4144,6 +4148,8 @@ got_name: rcu_read_lock(); list_for_each_entry_rcu(pmu, &pmus, entry) { cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); + if (cpuctx->active_pmu != pmu) + goto next; perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC); @@ -5145,20 +5151,36 @@ static void *find_pmu_context(int ctxn) return NULL; } -static void free_pmu_context(void * __percpu cpu_context) +static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu) { - struct pmu *pmu; + int cpu; + + for_each_possible_cpu(cpu) { + struct perf_cpu_context *cpuctx; + + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); + + if (cpuctx->active_pmu == old_pmu) + cpuctx->active_pmu = pmu; + } +} + +static void free_pmu_context(struct pmu *pmu) +{ + struct pmu *i; mutex_lock(&pmus_lock); /* * Like a real lame refcount. */ - list_for_each_entry(pmu, &pmus, entry) { - if (pmu->pmu_cpu_context == cpu_context) + list_for_each_entry(i, &pmus, entry) { + if (i->pmu_cpu_context == pmu->pmu_cpu_context) { + update_pmu_context(i, pmu); goto out; + } } - free_percpu(cpu_context); + free_percpu(pmu->pmu_cpu_context); out: mutex_unlock(&pmus_lock); } @@ -5190,6 +5212,7 @@ int perf_pmu_register(struct pmu *pmu) cpuctx->ctx.pmu = pmu; cpuctx->jiffies_interval = 1; INIT_LIST_HEAD(&cpuctx->rotation_list); + cpuctx->active_pmu = pmu; } got_cpu_context: @@ -5241,7 +5264,7 @@ void perf_pmu_unregister(struct pmu *pmu) synchronize_rcu(); free_percpu(pmu->pmu_disable_count); - free_pmu_context(pmu->pmu_cpu_context); + free_pmu_context(pmu); } struct pmu *perf_init_event(struct perf_event *event) -- cgit v1.2.3 From 4720dd1b3858f0da2593188cb1e57eb0d3bc4af2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 9 Dec 2010 17:43:21 +0100 Subject: x86: io_apic: Avoid unused variable warning when CONFIG_GENERIC_PENDING_IRQ=n arch/x86/kernel/apic/io_apic.c: In function 'ack_apic_level': arch/x86/kernel/apic/io_apic.c:2433: warning: unused variable 'desc' Signed-off-by: Andrew Morton LKML-Reference: <201010272107.o9RL7rse018212@imap1.linux-foundation.org> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7cc0a721f628..226060eec341 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2430,13 +2430,12 @@ static void ack_apic_level(struct irq_data *data) { struct irq_cfg *cfg = data->chip_data; int i, do_unmask_irq = 0, irq = data->irq; - struct irq_desc *desc = irq_to_desc(irq); unsigned long v; irq_complete_move(cfg); #ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ - if (unlikely(desc->status & IRQ_MOVE_PENDING)) { + if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { do_unmask_irq = 1; mask_ioapic(cfg); } -- cgit v1.2.3 From f1c18071ad70e2a78ab31fc26a18fcfa954a05c6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 13 Dec 2010 12:43:23 +0100 Subject: x86: HPET: Chose a paranoid safe value for the ETIME check commit 995bd3bb5 (x86: Hpet: Avoid the comparator readback penalty) chose 8 HPET cycles as a safe value for the ETIME check, as we had the confirmation that the posted write to the comparator register is delayed by two HPET clock cycles on Intel chipsets which showed readback problems. After that patch hit mainline we got reports from machines with newer AMD chipsets which seem to have an even longer delay. See http://thread.gmane.org/gmane.linux.kernel/1054283 and http://thread.gmane.org/gmane.linux.kernel/1069458 for further information. Boris tried to come up with an ACPI based selection of the minimum HPET cycles, but this failed on a couple of test machines. And of course we did not get any useful information from the hardware folks. For now our only option is to chose a paranoid high and safe value for the minimum HPET cycles used by the ETIME check. Adjust the minimum ns value for the HPET clockevent accordingly. Reported-Bistected-and-Tested-by: Markus Trippelsdorf Signed-off-by: Thomas Gleixner LKML-Reference: Cc: Simon Kirby Cc: Borislav Petkov Cc: Andreas Herrmann Cc: John Stultz --- arch/x86/kernel/hpet.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ae03cab4352e..4ff5968f12d2 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -27,6 +27,9 @@ #define HPET_DEV_FSB_CAP 0x1000 #define HPET_DEV_PERI_CAP 0x2000 +#define HPET_MIN_CYCLES 128 +#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) + #define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) /* @@ -299,8 +302,9 @@ static void hpet_legacy_clockevent_register(void) /* Calculate the min / max delta */ hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &hpet_clockevent); - /* 5 usec minimum reprogramming delta. */ - hpet_clockevent.min_delta_ns = 5000; + /* Setup minimum reprogramming delta. */ + hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA, + &hpet_clockevent); /* * Start hpet with the boot cpu mask and make it @@ -393,22 +397,24 @@ static int hpet_next_event(unsigned long delta, * the wraparound into account) nor a simple count down event * mode. Further the write to the comparator register is * delayed internally up to two HPET clock cycles in certain - * chipsets (ATI, ICH9,10). We worked around that by reading - * back the compare register, but that required another - * workaround for ICH9,10 chips where the first readout after - * write can return the old stale value. We already have a - * minimum delta of 5us enforced, but a NMI or SMI hitting + * chipsets (ATI, ICH9,10). Some newer AMD chipsets have even + * longer delays. We worked around that by reading back the + * compare register, but that required another workaround for + * ICH9,10 chips where the first readout after write can + * return the old stale value. We already had a minimum + * programming delta of 5us enforced, but a NMI or SMI hitting * between the counter readout and the comparator write can * move us behind that point easily. Now instead of reading * the compare register back several times, we make the ETIME * decision based on the following: Return ETIME if the - * counter value after the write is less than 8 HPET cycles + * counter value after the write is less than HPET_MIN_CYCLES * away from the event or if the counter is already ahead of - * the event. + * the event. The minimum programming delta for the generic + * clockevents code is set to 1.5 * HPET_MIN_CYCLES. */ res = (s32)(cnt - hpet_readl(HPET_COUNTER)); - return res < 8 ? -ETIME : 0; + return res < HPET_MIN_CYCLES ? -ETIME : 0; } static void hpet_legacy_set_mode(enum clock_event_mode mode, -- cgit v1.2.3 From de2a8cf98ecdde25231d6c5e7901e2cffaf32af9 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 13 Dec 2010 16:01:38 -0800 Subject: x86, gcc-4.6: Use gcc -m options when building vdso The vdso Makefile passes linker-style -m options not to the linker but to gcc. This happens to work with earlier gcc, but fails with gcc 4.6. Pass gcc-style -m options, instead. Note: all currently supported versions of gcc supports -m32, so there is no reason to conditionalize it any more. Reported-by: H. J. Lu Signed-off-by: H. Peter Anvin LKML-Reference: Cc: --- arch/x86/vdso/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 4a2afa1bac51..b6552b189bcd 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -25,7 +25,7 @@ targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y) export CPPFLAGS_vdso.lds += -P -C -VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -Wl,-soname=linux-vdso.so.1 \ +VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so @@ -69,7 +69,7 @@ vdso32.so-$(VDSO32-y) += sysenter vdso32-images = $(vdso32.so-y:%=vdso32-%.so) CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) -VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1 +VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1 # This makes sure the $(obj) subdirectory exists even though vdso32/ # is not a kbuild sub-make subdirectory. -- cgit v1.2.3 From 53dde5f385bc56e312f78b7cb25ffaf8efd4735d Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 16 Nov 2010 13:23:50 -0800 Subject: bootmem: Add alloc_bootmem_align() Add an alloc_bootmem_align() interface to allocate bootmem with specified alignment. This is necessary to be able to allocate the xsave area in a subsequent patch. Signed-off-by: Suresh Siddha LKML-Reference: <20101116212441.977574826@sbsiddha-MOBL3.sc.intel.com> Acked-by: H. Peter Anvin Signed-off-by: H. Peter Anvin Cc: --- include/linux/bootmem.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 266ab9291232..499dfe982a0e 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -105,6 +105,8 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, #define alloc_bootmem(x) \ __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_align(x, align) \ + __alloc_bootmem(x, align, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_nopanic(x) \ __alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages(x) \ -- cgit v1.2.3 From 10340ae130fb70352eae1ae8a00b7906d91bf166 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 16 Nov 2010 13:23:51 -0800 Subject: x86, xsave: Use alloc_bootmem_align() instead of alloc_bootmem() Alignment of alloc_bootmem() depends on the value of L1_CACHE_SHIFT. What we need here, however, is 64 byte alignment. Use alloc_bootmem_align() and explicitly specify the alignment instead. This fixes a kernel boot crash reported by Jody when the cpu in .config is set to MPENTIUMII but the kernel is booted on a xsave-capable CPU. Reported-by: Jody Bruchon Signed-off-by: Suresh Siddha LKML-Reference: <20101116212442.059967454@sbsiddha-MOBL3.sc.intel.com> Signed-off-by: H. Peter Anvin Cc: --- arch/x86/kernel/xsave.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 9c253bd65e24..547128546cc3 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -394,7 +394,8 @@ static void __init setup_xstate_init(void) * Setup init_xstate_buf to represent the init state of * all the features managed by the xsave */ - init_xstate_buf = alloc_bootmem(xstate_size); + init_xstate_buf = alloc_bootmem_align(xstate_size, + __alignof__(struct xsave_struct)); init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; clts(); -- cgit v1.2.3 From 254e42006c893f45bca48f313536fcba12206418 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 6 Dec 2010 12:26:30 -0800 Subject: x86, vt-d: Quirk for masking vtd spec errors to platform error handling logic On platforms with Intel 7500 chipset, there were some reports of system hang/NMI's during kexec/kdump in the presence of interrupt-remapping enabled. During kdump, there is a window where the devices might be still using old kernel's interrupt information, while the kdump kernel is coming up. This can cause vt-d faults as the interrupt configuration from the old kernel map to null IRTE entries in the new kernel etc. (with out interrupt-remapping enabled, we still have the same issue but in this case we will see benign spurious interrupt hit the new kernel). Based on platform config settings, these platforms seem to generate NMI/SMI when a vt-d fault happens and there were reports that the resulting SMI causes the system to hang. Fix it by masking vt-d spec defined errors to platform error reporting logic. VT-d spec related errors are already handled by the VT-d OS code, so need to report the same error through other channels. Signed-off-by: Suresh Siddha LKML-Reference: <1291667190.2675.8.camel@sbsiddha-MOBL3.sc.intel.com> Cc: stable@kernel.org [v2.6.32+] Reported-by: Max Asbock Reported-and-tested-by: Takao Indoh Acked-by: Chris Wright Acked-by: Kenji Kaneshige Signed-off-by: H. Peter Anvin --- drivers/pci/quirks.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6f9350cabbd5..36191edd6d51 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2764,6 +2764,29 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_m DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832); #endif /*CONFIG_MMC_RICOH_MMC*/ +#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) +#define VTUNCERRMSK_REG 0x1ac +#define VTD_MSK_SPEC_ERRORS (1 << 31) +/* + * This is a quirk for masking vt-d spec defined errors to platform error + * handling logic. With out this, platforms using Intel 7500, 5500 chipsets + * (and the derivative chipsets like X58 etc) seem to generate NMI/SMI (based + * on the RAS config settings of the platform) when a vt-d fault happens. + * The resulting SMI caused the system to hang. + * + * VT-d spec related errors are already handled by the VT-d OS code, so no + * need to report the same error through other channels. + */ +static void vtd_mask_spec_errors(struct pci_dev *dev) +{ + u32 word; + + pci_read_config_dword(dev, VTUNCERRMSK_REG, &word); + pci_write_config_dword(dev, VTUNCERRMSK_REG, word | VTD_MSK_SPEC_ERRORS); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, vtd_mask_spec_errors); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x3c28, vtd_mask_spec_errors); +#endif static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end) -- cgit v1.2.3 From 086e8ced65d9bcc4a8e8f1cd39b09640f2883f90 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Wed, 1 Dec 2010 09:40:32 -0800 Subject: x86, vt-d: Fix the vt-d fault handling irq migration in the x2apic mode In x2apic mode, we need to set the upper address register of the fault handling interrupt register of the vt-d hardware. Without this irq migration of the vt-d fault handling interrupt is broken. Signed-off-by: Kenji Kaneshige LKML-Reference: <1291225233.2648.39.camel@sbsiddha-MOBL3> Signed-off-by: Suresh Siddha Cc: stable@kernel.org [v2.6.32+] Acked-by: Chris Wright Tested-by: Takao Indoh Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 226060eec341..fadcd743a74f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3412,6 +3412,7 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, msg.data |= MSI_DATA_VECTOR(cfg->vector); msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; msg.address_lo |= MSI_ADDR_DEST_ID(dest); + msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); dmar_msi_write(irq, &msg); -- cgit v1.2.3 From 7f7fbf45c6b748074546f7f16b9488ca71de99c1 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Tue, 30 Nov 2010 22:22:28 -0800 Subject: x86: Enable the intr-remap fault handling after local APIC setup Interrupt-remapping gets enabled very early in the boot, as it determines the apic mode that the processor can use. And the current code enables the vt-d fault handling before the setup_local_APIC(). And hence the APIC LDR registers and data structure in the memory may not be initialized. So the vt-d fault handling in logical xapic/x2apic modes were broken. Fix this by enabling the vt-d fault handling in the end_local_APIC_setup() A cleaner fix of enabling fault handling while enabling intr-remapping will be addressed for v2.6.38. [ Enabling intr-remapping determines the usage of x2apic mode and the apic mode determines the fault-handling configuration. ] Signed-off-by: Kenji Kaneshige LKML-Reference: <20101201062244.541996375@intel.com> Signed-off-by: Suresh Siddha Cc: stable@kernel.org [v2.6.32+] Acked-by: Chris Wright Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/apic.c | 8 ++++++++ arch/x86/kernel/apic/probe_64.c | 7 ------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3f838d537392..78218135b48e 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1389,6 +1389,14 @@ void __cpuinit end_local_APIC_setup(void) setup_apic_nmi_watchdog(NULL); apic_pm_activate(); + + /* + * Now that local APIC setup is completed for BP, configure the fault + * handling for interrupt remapping. + */ + if (!smp_processor_id() && intr_remapping_enabled) + enable_drhd_fault_handling(); + } #ifdef CONFIG_X86_X2APIC diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index f9e4e6a54073..d8c4a6feb286 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -79,13 +79,6 @@ void __init default_setup_apic_routing(void) /* need to update phys_pkg_id */ apic->phys_pkg_id = apicid_phys_pkg_id; } - - /* - * Now that apic routing model is selected, configure the - * fault handling for intr remapping. - */ - if (intr_remapping_enabled) - enable_drhd_fault_handling(); } /* Same for both flat and physical. */ -- cgit v1.2.3 From 7f99d946e71e71d484b7543b49e990508e70d0c0 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 30 Nov 2010 22:22:29 -0800 Subject: x86, vt-d: Handle previous faults after enabling fault handling Fault handling is getting enabled after enabling the interrupt-remapping (as the success of interrupt-remapping can affect the apic mode and hence the fault handling mode). Hence there can potentially be some faults between the window of enabling interrupt-remapping in the vt-d and the fault-handling of the vt-d units. Handle any previous faults after enabling the vt-d fault handling. For v2.6.38 cleanup, need to check if we can remove the dmar_fault() in the enable_intr_remapping() and see if we can enable fault handling along with enabling intr-remapping. Signed-off-by: Suresh Siddha LKML-Reference: <20101201062244.630417138@intel.com> Cc: stable@kernel.org [v2.6.32+] Acked-by: Chris Wright Signed-off-by: H. Peter Anvin --- drivers/pci/dmar.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 0157708d474d..09933eb9126b 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -1417,6 +1417,11 @@ int __init enable_drhd_fault_handling(void) (unsigned long long)drhd->reg_base_addr, ret); return -1; } + + /* + * Clear any previous faults. + */ + dmar_fault(iommu->irq, iommu); } return 0; -- cgit v1.2.3 From ce677831a4abd0f9f957c90ac6f6a0d0472bafb4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 24 Oct 2010 21:50:42 +0200 Subject: perf: Fix off by one in perf_swevent_init() The perf_swevent_enabled[] array has PERF_COUNT_SW_MAX elements. Signed-off-by: Dan Carpenter Signed-off-by: Peter Zijlstra LKML-Reference: <20101024195041.GT5985@bicker> Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 7b870174c56d..2870feee81dd 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4719,7 +4719,7 @@ static int perf_swevent_init(struct perf_event *event) break; } - if (event_id > PERF_COUNT_SW_MAX) + if (event_id >= PERF_COUNT_SW_MAX) return -ENOENT; if (!event->parent) { -- cgit v1.2.3 From 147dd5610c8d1bacb88a6c1dfdaceaf257946ed0 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 16 Dec 2010 19:11:09 -0800 Subject: x86-32: Make sure we can map all of lowmem if we need to A relocatable kernel can be anywhere in lowmem -- and in the case of a kdump kernel, is likely to be fairly high. Since the early page tables map everything from address zero up we need to make sure we allocate enough brk that we can map all of lowmem if we need to. Reported-by: Stanislaw Gruszka Signed-off-by: H. Peter Anvin Tested-by: Yinghai Lu LKML-Reference: <4D0AD3ED.8070607@kernel.org> --- arch/x86/boot/compressed/misc.c | 2 +- arch/x86/kernel/head_32.S | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 23f315c9f215..325c05294fc4 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -355,7 +355,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, if (heap > 0x3fffffffffffUL) error("Destination address too large"); #else - if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff)) + if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) error("Destination address too large"); #endif #ifndef CONFIG_RELOCATABLE diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index bcece91dd311..d7cdf5bc1e63 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -60,16 +60,18 @@ #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) #endif +/* Number of possible pages in the lowmem region */ +LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) + /* Enough space to fit pagetables for the low memory linear map */ -MAPPING_BEYOND_END = \ - PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT +MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT /* * Worst-case size of the kernel mapping we need to make: - * the worst-case size of the kernel itself, plus the extra we need - * to map for the linear map. + * a relocatable kernel can live anywhere in lowmem, so we need to be able + * to map all of lowmem. */ -KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT +KERNEL_PAGES = LOWMEM_PAGES INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm RESERVE_BRK(pagetables, INIT_MAP_SIZE) -- cgit v1.2.3