summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorAlex Gonzalez <alex.gonzalez@digi.com>2012-01-19 13:54:23 +0100
committerAlex Gonzalez <alex.gonzalez@digi.com>2012-01-19 13:54:23 +0100
commit802699c91a967767fc94759f7a3e5e82d8269245 (patch)
treec8b714dd25edd333efbbf8bb1eb6c3d379084cc4 /arch/x86
parentf135e68daa6745fd3dbb285e6161ae2758c4027f (diff)
parent675f7660ffb0e1880011f6b3c4f9ac241491e3cd (diff)
Merge commit 'v2.6.35.14' into del-5.8/main
Conflicts: arch/arm/plat-mxc/include/mach/gpio.h arch/x86/kernel/cpu/mtrr/main.c drivers/mmc/core/core.c drivers/net/smsc911x.c fs/proc/task_mmu.c include/linux/pm_runtime.h mm/memory.c mm/mlock.c Signed-off-by: Alex Gonzalez <alex.gonzalez@digi.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig16
-rw-r--r--arch/x86/ia32/ia32entry.S22
-rw-r--r--arch/x86/include/asm/acpi.h1
-rw-r--r--arch/x86/include/asm/amd_iommu_proto.h6
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h12
-rw-r--r--arch/x86/include/asm/apicdef.h1
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h121
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h44
-rw-r--r--arch/x86/include/asm/compat.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h3
-rw-r--r--arch/x86/include/asm/hpet.h1
-rw-r--r--arch/x86/include/asm/io.h1
-rw-r--r--arch/x86/include/asm/kvm_emulate.h10
-rw-r--r--arch/x86/include/asm/kvm_host.h26
-rw-r--r--arch/x86/include/asm/mmu_context.h5
-rw-r--r--arch/x86/include/asm/msr-index.h11
-rw-r--r--arch/x86/include/asm/pgtable-3level.h11
-rw-r--r--arch/x86/include/asm/pgtable_32.h1
-rw-r--r--arch/x86/include/asm/processor.h42
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/include/asm/smp.h9
-rw-r--r--arch/x86/include/asm/smpboot_hooks.h2
-rw-r--r--arch/x86/include/asm/trampoline.h5
-rw-r--r--arch/x86/include/asm/tsc.h2
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c14
-rw-r--r--arch/x86/kernel/amd_iommu.c20
-rw-r--r--arch/x86/kernel/amd_iommu_init.c75
-rw-r--r--arch/x86/kernel/apic/apic.c10
-rw-r--r--arch/x86/kernel/apic/io_apic.c18
-rw-r--r--arch/x86/kernel/apic/probe_64.c7
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c1
-rw-r--r--arch/x86/kernel/cpu/amd.c97
-rw-r--r--arch/x86/kernel/cpu/common.c4
-rw-r--r--arch/x86/kernel/cpu/cpu.h1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c1
-rw-r--r--arch/x86/kernel/cpu/intel.c19
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c12
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c54
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c81
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c11
-rw-r--r--arch/x86/kernel/cpu/vmware.c10
-rw-r--r--arch/x86/kernel/crash_dump_64.c3
-rw-r--r--arch/x86/kernel/e820.c10
-rw-r--r--arch/x86/kernel/early-quirks.c39
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/head_32.S8
-rw-r--r--arch/x86/kernel/hpet.c33
-rw-r--r--arch/x86/kernel/hw_breakpoint.c4
-rw-r--r--arch/x86/kernel/microcode_amd.c65
-rw-r--r--arch/x86/kernel/microcode_intel.c16
-rw-r--r--arch/x86/kernel/mpparse.c16
-rw-r--r--arch/x86/kernel/olpc.c5
-rw-r--r--arch/x86/kernel/pci-gart_64.c9
-rw-r--r--arch/x86/kernel/process.c39
-rw-r--r--arch/x86/kernel/process_32.c1
-rw-r--r--arch/x86/kernel/process_64.c1
-rw-r--r--arch/x86/kernel/pvclock.c5
-rw-r--r--arch/x86/kernel/reboot.c18
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S2
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S2
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/smp.c15
-rw-r--r--arch/x86/kernel/smpboot.c136
-rw-r--r--arch/x86/kernel/trampoline.c17
-rw-r--r--arch/x86/kernel/traps.c1
-rw-r--r--arch/x86/kernel/tsc.c46
-rw-r--r--arch/x86/kernel/vm86_32.c10
-rw-r--r--arch/x86/kernel/xsave.c3
-rw-r--r--arch/x86/kvm/emulate.c13
-rw-r--r--arch/x86/kvm/i8259.c2
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/mmu.c8
-rw-r--r--arch/x86/kvm/paging_tmpl.h28
-rw-r--r--arch/x86/kvm/svm.c54
-rw-r--r--arch/x86/kvm/vmx.c43
-rw-r--r--arch/x86/kvm/x86.c27
-rw-r--r--arch/x86/lib/atomic64_386_32.S238
-rw-r--r--arch/x86/lib/copy_user_64.S4
-rw-r--r--arch/x86/lib/semaphore_32.S2
-rw-r--r--arch/x86/mm/fault.c11
-rw-r--r--arch/x86/mm/pgtable.c3
-rw-r--r--arch/x86/mm/srat_64.c8
-rw-r--r--arch/x86/oprofile/nmi_int.c44
-rw-r--r--arch/x86/oprofile/op_model_amd.c24
-rw-r--r--arch/x86/pci/acpi.c9
-rw-r--r--arch/x86/power/cpu.c2
-rw-r--r--arch/x86/vdso/Makefile4
-rw-r--r--arch/x86/xen/enlighten.c6
-rw-r--r--arch/x86/xen/mmu.c10
-rw-r--r--arch/x86/xen/multicalls.c12
-rw-r--r--arch/x86/xen/smp.c6
-rw-r--r--arch/x86/xen/time.c41
97 files changed, 1268 insertions, 651 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index dcb0593b4a66..d2b82109a3fa 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -247,6 +247,11 @@ config ARCH_HWEIGHT_CFLAGS
config KTIME_SCALAR
def_bool X86_32
+
+config ARCH_CPU_PROBE_RELEASE
+ def_bool y
+ depends on HOTPLUG_CPU
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
@@ -792,6 +797,17 @@ config SCHED_MC
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
+config IRQ_TIME_ACCOUNTING
+ bool "Fine granularity task level IRQ time accounting"
+ default n
+ ---help---
+ Select this option to enable fine granularity task irq time
+ accounting. This is done by reading a timestamp on each
+ transitions between softirq and hardirq state, so there can be a
+ small performance impact.
+
+ If in doubt, say N here.
+
source "kernel/Kconfig.preempt"
config X86_UP_APIC
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index e790bc1fbfa3..4f5f71e03ce1 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -50,7 +50,12 @@
/*
* Reload arg registers from stack in case ptrace changed them.
* We don't reload %eax because syscall_trace_enter() returned
- * the value it wants us to use in the table lookup.
+ * the %rax value we should see. Instead, we just truncate that
+ * value to 32 bits again as we did on entry from user mode.
+ * If it's a new value set by user_regset during entry tracing,
+ * this matches the normal truncation of the user-mode value.
+ * If it's -1 to make us punt the syscall, then (u32)-1 is still
+ * an appropriately invalid value.
*/
.macro LOAD_ARGS32 offset, _r9=0
.if \_r9
@@ -60,6 +65,7 @@
movl \offset+48(%rsp),%edx
movl \offset+56(%rsp),%esi
movl \offset+64(%rsp),%edi
+ movl %eax,%eax /* zero extension */
.endm
.macro CFI_STARTPROC32 simple
@@ -153,7 +159,7 @@ ENTRY(ia32_sysenter_target)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
CFI_REMEMBER_STATE
jnz sysenter_tracesys
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
sysenter_do_call:
IA32_ARG_FIXUP
@@ -195,7 +201,7 @@ sysexit_from_sys_call:
movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
call audit_syscall_entry
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
movl %ebx,%edi /* reload 1st syscall arg */
movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
@@ -248,7 +254,7 @@ sysenter_tracesys:
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
jmp sysenter_do_call
CFI_ENDPROC
@@ -314,7 +320,7 @@ ENTRY(ia32_cstar_target)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
CFI_REMEMBER_STATE
jnz cstar_tracesys
- cmpl $IA32_NR_syscalls-1,%eax
+ cmpq $IA32_NR_syscalls-1,%rax
ja ia32_badsys
cstar_do_call:
IA32_ARG_FIXUP 1
@@ -367,7 +373,7 @@ cstar_tracesys:
LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
RESTORE_REST
xchgl %ebp,%r9d
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
jmp cstar_do_call
END(ia32_cstar_target)
@@ -425,7 +431,7 @@ ENTRY(ia32_syscall)
orl $TS_COMPAT,TI_status(%r10)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
jnz ia32_tracesys
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
ia32_do_call:
IA32_ARG_FIXUP
@@ -444,7 +450,7 @@ ia32_tracesys:
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
jmp ia32_do_call
END(ia32_syscall)
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index aa2c39d968fc..27d837979e80 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -88,6 +88,7 @@ extern int acpi_disabled;
extern int acpi_pci_disabled;
extern int acpi_skip_timer_override;
extern int acpi_use_timer_override;
+extern int acpi_fix_pin2_polarity;
extern u8 acpi_sci_flags;
extern int acpi_sci_override_gsi;
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index d2544f1d705d..cb030374b90a 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -38,4 +38,10 @@ static inline void amd_iommu_stats_init(void) { }
#endif /* !CONFIG_AMD_IOMMU_STATS */
+static inline bool is_rd890_iommu(struct pci_dev *pdev)
+{
+ return (pdev->vendor == PCI_VENDOR_ID_ATI) &&
+ (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
+}
+
#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 7014e88bc779..08616180deaf 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -368,6 +368,9 @@ struct amd_iommu {
/* capabilities of that IOMMU read from ACPI */
u32 cap;
+ /* flags read from acpi table */
+ u8 acpi_flags;
+
/*
* Capability pointer. There could be more than one IOMMU per PCI
* device function if there are more than one AMD IOMMU capability
@@ -411,6 +414,15 @@ struct amd_iommu {
/* default dma_ops domain for that IOMMU */
struct dma_ops_domain *default_dom;
+
+ /*
+ * This array is required to work around a potential BIOS bug.
+ * The BIOS may miss to restore parts of the PCI configuration
+ * space when the system resumes from S3. The result is that the
+ * IOMMU does not execute commands anymore which leads to system
+ * failure.
+ */
+ u32 cache_cfg[4];
};
/*
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 7fe3b3060f08..49d7c96791dc 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -78,6 +78,7 @@
#define APIC_DEST_LOGICAL 0x00800
#define APIC_DEST_PHYSICAL 0x00000
#define APIC_DM_FIXED 0x00000
+#define APIC_DM_FIXED_MASK 0x00700
#define APIC_DM_LOWEST 0x00100
#define APIC_DM_SMI 0x00200
#define APIC_DM_REMRD 0x00300
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 8859e12dd3cf..20955ea7bc12 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -27,20 +27,20 @@ struct __xchg_dummy {
switch (size) { \
case 1: \
asm volatile("xchgb %b0,%1" \
- : "=q" (__x) \
- : "m" (*__xg(ptr)), "0" (__x) \
+ : "=q" (__x), "+m" (*__xg(ptr)) \
+ : "0" (__x) \
: "memory"); \
break; \
case 2: \
asm volatile("xchgw %w0,%1" \
- : "=r" (__x) \
- : "m" (*__xg(ptr)), "0" (__x) \
+ : "=r" (__x), "+m" (*__xg(ptr)) \
+ : "0" (__x) \
: "memory"); \
break; \
case 4: \
asm volatile("xchgl %0,%1" \
- : "=r" (__x) \
- : "m" (*__xg(ptr)), "0" (__x) \
+ : "=r" (__x), "+m" (*__xg(ptr)) \
+ : "0" (__x) \
: "memory"); \
break; \
default: \
@@ -53,60 +53,33 @@ struct __xchg_dummy {
__xchg((v), (ptr), sizeof(*ptr))
/*
- * The semantics of XCHGCMP8B are a bit strange, this is why
- * there is a loop and the loading of %%eax and %%edx has to
- * be inside. This inlines well in most cases, the cached
- * cost is around ~38 cycles. (in the future we might want
- * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
- * might have an implicit FPU-save as a cost, so it's not
- * clear which path to go.)
+ * CMPXCHG8B only writes to the target if we had the previous
+ * value in registers, otherwise it acts as a read and gives us the
+ * "new previous" value. That is why there is a loop. Preloading
+ * EDX:EAX is a performance optimization: in the common case it means
+ * we need only one locked operation.
*
- * cmpxchg8b must be used with the lock prefix here to allow
- * the instruction to be executed atomically, see page 3-102
- * of the instruction set reference 24319102.pdf. We need
- * the reader side to see the coherent 64bit value.
+ * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very
+ * least an FPU save and/or %cr0.ts manipulation.
+ *
+ * cmpxchg8b must be used with the lock prefix here to allow the
+ * instruction to be executed atomically. We need to have the reader
+ * side to see the coherent 64bit value.
*/
-static inline void __set_64bit(unsigned long long *ptr,
- unsigned int low, unsigned int high)
+static inline void set_64bit(volatile u64 *ptr, u64 value)
{
+ u32 low = value;
+ u32 high = value >> 32;
+ u64 prev = *ptr;
+
asm volatile("\n1:\t"
- "movl (%0), %%eax\n\t"
- "movl 4(%0), %%edx\n\t"
- LOCK_PREFIX "cmpxchg8b (%0)\n\t"
+ LOCK_PREFIX "cmpxchg8b %0\n\t"
"jnz 1b"
- : /* no outputs */
- : "D"(ptr),
- "b"(low),
- "c"(high)
- : "ax", "dx", "memory");
-}
-
-static inline void __set_64bit_constant(unsigned long long *ptr,
- unsigned long long value)
-{
- __set_64bit(ptr, (unsigned int)value, (unsigned int)(value >> 32));
-}
-
-#define ll_low(x) *(((unsigned int *)&(x)) + 0)
-#define ll_high(x) *(((unsigned int *)&(x)) + 1)
-
-static inline void __set_64bit_var(unsigned long long *ptr,
- unsigned long long value)
-{
- __set_64bit(ptr, ll_low(value), ll_high(value));
+ : "=m" (*ptr), "+A" (prev)
+ : "b" (low), "c" (high)
+ : "memory");
}
-#define set_64bit(ptr, value) \
- (__builtin_constant_p((value)) \
- ? __set_64bit_constant((ptr), (value)) \
- : __set_64bit_var((ptr), (value)))
-
-#define _set_64bit(ptr, value) \
- (__builtin_constant_p(value) \
- ? __set_64bit(ptr, (unsigned int)(value), \
- (unsigned int)((value) >> 32)) \
- : __set_64bit(ptr, ll_low((value)), ll_high((value))))
-
extern void __cmpxchg_wrong_size(void);
/*
@@ -121,21 +94,21 @@ extern void __cmpxchg_wrong_size(void);
__typeof__(*(ptr)) __new = (new); \
switch (size) { \
case 1: \
- asm volatile(lock "cmpxchgb %b1,%2" \
- : "=a"(__ret) \
- : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \
+ asm volatile(lock "cmpxchgb %b2,%1" \
+ : "=a" (__ret), "+m" (*__xg(ptr)) \
+ : "q" (__new), "0" (__old) \
: "memory"); \
break; \
case 2: \
- asm volatile(lock "cmpxchgw %w1,%2" \
- : "=a"(__ret) \
- : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \
+ asm volatile(lock "cmpxchgw %w2,%1" \
+ : "=a" (__ret), "+m" (*__xg(ptr)) \
+ : "r" (__new), "0" (__old) \
: "memory"); \
break; \
case 4: \
- asm volatile(lock "cmpxchgl %1,%2" \
- : "=a"(__ret) \
- : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \
+ asm volatile(lock "cmpxchgl %2,%1" \
+ : "=a" (__ret), "+m" (*__xg(ptr)) \
+ : "r" (__new), "0" (__old) \
: "memory"); \
break; \
default: \
@@ -180,12 +153,12 @@ static inline unsigned long long __cmpxchg64(volatile void *ptr,
unsigned long long new)
{
unsigned long long prev;
- asm volatile(LOCK_PREFIX "cmpxchg8b %3"
- : "=A"(prev)
- : "b"((unsigned long)new),
- "c"((unsigned long)(new >> 32)),
- "m"(*__xg(ptr)),
- "0"(old)
+ asm volatile(LOCK_PREFIX "cmpxchg8b %1"
+ : "=A" (prev),
+ "+m" (*__xg(ptr))
+ : "b" ((unsigned long)new),
+ "c" ((unsigned long)(new >> 32)),
+ "0" (old)
: "memory");
return prev;
}
@@ -195,12 +168,12 @@ static inline unsigned long long __cmpxchg64_local(volatile void *ptr,
unsigned long long new)
{
unsigned long long prev;
- asm volatile("cmpxchg8b %3"
- : "=A"(prev)
- : "b"((unsigned long)new),
- "c"((unsigned long)(new >> 32)),
- "m"(*__xg(ptr)),
- "0"(old)
+ asm volatile("cmpxchg8b %1"
+ : "=A" (prev),
+ "+m" (*__xg(ptr))
+ : "b" ((unsigned long)new),
+ "c" ((unsigned long)(new >> 32)),
+ "0" (old)
: "memory");
return prev;
}
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 485ae415faec..9596e7c61960 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -5,13 +5,11 @@
#define __xg(x) ((volatile long *)(x))
-static inline void set_64bit(volatile unsigned long *ptr, unsigned long val)
+static inline void set_64bit(volatile u64 *ptr, u64 val)
{
*ptr = val;
}
-#define _set_64bit set_64bit
-
extern void __xchg_wrong_size(void);
extern void __cmpxchg_wrong_size(void);
@@ -26,26 +24,26 @@ extern void __cmpxchg_wrong_size(void);
switch (size) { \
case 1: \
asm volatile("xchgb %b0,%1" \
- : "=q" (__x) \
- : "m" (*__xg(ptr)), "0" (__x) \
+ : "=q" (__x), "+m" (*__xg(ptr)) \
+ : "0" (__x) \
: "memory"); \
break; \
case 2: \
asm volatile("xchgw %w0,%1" \
- : "=r" (__x) \
- : "m" (*__xg(ptr)), "0" (__x) \
+ : "=r" (__x), "+m" (*__xg(ptr)) \
+ : "0" (__x) \
: "memory"); \
break; \
case 4: \
asm volatile("xchgl %k0,%1" \
- : "=r" (__x) \
- : "m" (*__xg(ptr)), "0" (__x) \
+ : "=r" (__x), "+m" (*__xg(ptr)) \
+ : "0" (__x) \
: "memory"); \
break; \
case 8: \
asm volatile("xchgq %0,%1" \
- : "=r" (__x) \
- : "m" (*__xg(ptr)), "0" (__x) \
+ : "=r" (__x), "+m" (*__xg(ptr)) \
+ : "0" (__x) \
: "memory"); \
break; \
default: \
@@ -71,27 +69,27 @@ extern void __cmpxchg_wrong_size(void);
__typeof__(*(ptr)) __new = (new); \
switch (size) { \
case 1: \
- asm volatile(lock "cmpxchgb %b1,%2" \
- : "=a"(__ret) \
- : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \
+ asm volatile(lock "cmpxchgb %b2,%1" \
+ : "=a" (__ret), "+m" (*__xg(ptr)) \
+ : "q" (__new), "0" (__old) \
: "memory"); \
break; \
case 2: \
- asm volatile(lock "cmpxchgw %w1,%2" \
- : "=a"(__ret) \
- : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \
+ asm volatile(lock "cmpxchgw %w2,%1" \
+ : "=a" (__ret), "+m" (*__xg(ptr)) \
+ : "r" (__new), "0" (__old) \
: "memory"); \
break; \
case 4: \
- asm volatile(lock "cmpxchgl %k1,%2" \
- : "=a"(__ret) \
- : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \
+ asm volatile(lock "cmpxchgl %k2,%1" \
+ : "=a" (__ret), "+m" (*__xg(ptr)) \
+ : "r" (__new), "0" (__old) \
: "memory"); \
break; \
case 8: \
- asm volatile(lock "cmpxchgq %1,%2" \
- : "=a"(__ret) \
- : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \
+ asm volatile(lock "cmpxchgq %2,%1" \
+ : "=a" (__ret), "+m" (*__xg(ptr)) \
+ : "r" (__new), "0" (__old) \
: "memory"); \
break; \
default: \
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 306160e58b48..1d9cd27c2920 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -205,7 +205,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
return (u32)(unsigned long)uptr;
}
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
{
struct pt_regs *regs = task_pt_regs(current);
return (void __user *)regs->sp - len;
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 468145914389..429e4f485746 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -150,7 +150,7 @@
#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */
#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */
#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */
+#define X86_FEATURE_XOP (6*32+11) /* extended AVX instructions */
#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */
#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */
#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
@@ -162,6 +162,7 @@
#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */
#define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */
#define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */
+#define X86_FEATURE_EPB (7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
/* Virtualization flags: Linux defined */
#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 004e6e25e913..1d5c08a1bdfd 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -68,7 +68,6 @@ extern unsigned long force_hpet_address;
extern u8 hpet_blockid;
extern int hpet_force_user;
extern u8 hpet_msi_disable;
-extern u8 hpet_readback_cmp;
extern int is_hpet_enabled(void);
extern int hpet_enable(void);
extern void hpet_disable(void);
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 30a3e9776123..6a45ec41ec26 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -206,6 +206,7 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
extern void iounmap(volatile void __iomem *addr);
+extern void set_iounmap_nonlazy(void);
#ifdef __KERNEL__
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 0b2729bf2070..50de7b4b2db1 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -143,7 +143,15 @@ struct x86_emulate_ops {
struct operand {
enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
unsigned int bytes;
- unsigned long val, orig_val, *ptr;
+ union {
+ unsigned long orig_val;
+ u64 orig_val64;
+ };
+ unsigned long *ptr;
+ union {
+ unsigned long val;
+ u64 val64;
+ };
};
struct fetch_cache {
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 76f5483cffec..535c3738ef6b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -77,7 +77,7 @@
#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
-#define KVM_MAX_CPUID_ENTRIES 40
+#define KVM_MAX_CPUID_ENTRIES 80
#define KVM_NR_FIXED_MTRR_REGION 88
#define KVM_NR_VAR_MTRR 8
@@ -673,20 +673,6 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
return (struct kvm_mmu_page *)page_private(page);
}
-static inline u16 kvm_read_fs(void)
-{
- u16 seg;
- asm("mov %%fs, %0" : "=g"(seg));
- return seg;
-}
-
-static inline u16 kvm_read_gs(void)
-{
- u16 seg;
- asm("mov %%gs, %0" : "=g"(seg));
- return seg;
-}
-
static inline u16 kvm_read_ldt(void)
{
u16 ldt;
@@ -694,16 +680,6 @@ static inline u16 kvm_read_ldt(void)
return ldt;
}
-static inline void kvm_load_fs(u16 sel)
-{
- asm("mov %0, %%fs" : : "rm"(sel));
-}
-
-static inline void kvm_load_gs(u16 sel)
-{
- asm("mov %0, %%gs" : : "rm"(sel));
-}
-
static inline void kvm_load_ldt(u16 sel)
{
asm("lldt %0" : : "rm"(sel));
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 4a2d4e0c18d9..8b5393ec1080 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -36,8 +36,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
unsigned cpu = smp_processor_id();
if (likely(prev != next)) {
- /* stop flush ipis for the previous mm */
- cpumask_clear_cpu(cpu, mm_cpumask(prev));
#ifdef CONFIG_SMP
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
percpu_write(cpu_tlbstate.active_mm, next);
@@ -47,6 +45,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
/* Re-load page tables */
load_cr3(next->pgd);
+ /* stop flush ipis for the previous mm */
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+
/*
* load the LDT, if the LDT is different:
*/
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 8c7ae4318629..692c73bdc3fe 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -85,11 +85,15 @@
#define MSR_IA32_MC0_ADDR 0x00000402
#define MSR_IA32_MC0_MISC 0x00000403
+#define MSR_AMD64_MC0_MASK 0xc0010044
+
#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x))
#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x))
#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x))
+#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x))
+
/* These are consecutive and not in the normal 4er MCE bank block */
#define MSR_IA32_MC0_CTL2 0x00000280
#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x))
@@ -227,6 +231,11 @@
#define THERM_INT_LOW_ENABLE (1 << 0)
#define THERM_INT_HIGH_ENABLE (1 << 1)
+#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
+#define ENERGY_PERF_BIAS_PERFORMANCE 0
+#define ENERGY_PERF_BIAS_NORMAL 6
+#define ENERGY_PERF_BIAS_POWERSWAVE 15
+
#define MSR_IA32_THERM_STATUS 0x0000019c
#define THERM_STATUS_PROCHOT (1 << 0)
@@ -239,6 +248,8 @@
#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2
+#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
+
/* MISC_ENABLE bits: architectural */
#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0)
#define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1)
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 177b0165ea01..33927d283c92 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -69,8 +69,6 @@ static inline void native_pmd_clear(pmd_t *pmd)
static inline void pud_clear(pud_t *pudp)
{
- unsigned long pgd;
-
set_pud(pudp, __pud(0));
/*
@@ -79,13 +77,10 @@ static inline void pud_clear(pud_t *pudp)
* section 8.1: in PAE mode we explicitly have to flush the
* TLB via cr3 if the top-level pgd is changed...
*
- * Make sure the pud entry we're updating is within the
- * current pgd to avoid unnecessary TLB flushes.
+ * Currently all places where pud_clear() is called either have
+ * flush_tlb_mm() followed or don't need TLB flush (x86_64 code or
+ * pud_clear_bad()), so we don't need TLB flush here.
*/
- pgd = read_cr3();
- if (__pa(pudp) >= pgd && __pa(pudp) <
- (pgd + sizeof(pgd_t)*PTRS_PER_PGD))
- write_cr3(pgd);
}
#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 2984a25ff383..f686f49e8b7b 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -26,6 +26,7 @@ struct mm_struct;
struct vm_area_struct;
extern pgd_t swapper_pg_dir[1024];
+extern pgd_t trampoline_pg_dir[1024];
static inline void pgtable_cache_init(void) { }
static inline void check_pgt_cache(void) { }
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7e5c6a60b8ee..2fe362c72eb0 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -763,29 +763,6 @@ extern unsigned long boot_option_idle_override;
extern unsigned long idle_halt;
extern unsigned long idle_nomwait;
-/*
- * on systems with caches, caches must be flashed as the absolute
- * last instruction before going into a suspended halt. Otherwise,
- * dirty data can linger in the cache and become stale on resume,
- * leading to strange errors.
- *
- * perform a variety of operations to guarantee that the compiler
- * will not reorder instructions. wbinvd itself is serializing
- * so the processor will not reorder.
- *
- * Systems without cache can just go into halt.
- */
-static inline void wbinvd_halt(void)
-{
- mb();
- /* check for clflush to determine if wbinvd is legal */
- if (cpu_has_clflush)
- asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory");
- else
- while (1)
- halt();
-}
-
extern void enable_sep_cpu(void);
extern int sysenter_setup(void);
@@ -1025,4 +1002,23 @@ unsigned long calc_aperfmperf_ratio(struct aperfmperf *old,
return ratio;
}
+/*
+ * AMD errata checking
+ */
+#ifdef CONFIG_CPU_SUP_AMD
+extern const int amd_erratum_400[];
+extern bool cpu_has_amd_erratum(const int *);
+
+#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 }
+#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 }
+#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
+ ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
+#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff)
+#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff)
+#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff)
+
+#else
+#define cpu_has_amd_erratum(x) (false)
+#endif /* CONFIG_CPU_SUP_AMD */
+
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index cd02f324aa6b..6226870d11cd 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -11,5 +11,6 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
struct pvclock_vcpu_time_info *vcpu,
struct timespec *ts);
+void pvclock_resume(void);
#endif /* _ASM_X86_PVCLOCK_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 4cfc90824068..4c2f63c7fc1b 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -50,7 +50,7 @@ struct smp_ops {
void (*smp_prepare_cpus)(unsigned max_cpus);
void (*smp_cpus_done)(unsigned max_cpus);
- void (*smp_send_stop)(void);
+ void (*stop_other_cpus)(int wait);
void (*smp_send_reschedule)(int cpu);
int (*cpu_up)(unsigned cpu);
@@ -73,7 +73,12 @@ extern struct smp_ops smp_ops;
static inline void smp_send_stop(void)
{
- smp_ops.smp_send_stop();
+ smp_ops.stop_other_cpus(0);
+}
+
+static inline void stop_other_cpus(void)
+{
+ smp_ops.stop_other_cpus(1);
}
static inline void smp_prepare_boot_cpu(void)
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
index 1def60114906..cfdc6c88c9d5 100644
--- a/arch/x86/include/asm/smpboot_hooks.h
+++ b/arch/x86/include/asm/smpboot_hooks.h
@@ -34,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
*/
CMOS_WRITE(0, 0xf);
- *((volatile long *)phys_to_virt(apic->trampoline_phys_low)) = 0;
+ *((volatile u32 *)phys_to_virt(apic->trampoline_phys_low)) = 0;
}
static inline void __init smpboot_setup_io_apic(void)
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
index cb507bb05d79..4dde797c0578 100644
--- a/arch/x86/include/asm/trampoline.h
+++ b/arch/x86/include/asm/trampoline.h
@@ -13,14 +13,17 @@ extern unsigned char *trampoline_base;
extern unsigned long init_rsp;
extern unsigned long initial_code;
+extern unsigned long initial_page_table;
extern unsigned long initial_gs;
#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
extern unsigned long setup_trampoline(void);
+extern void __init setup_trampoline_page_table(void);
extern void __init reserve_trampoline_memory(void);
#else
-static inline void reserve_trampoline_memory(void) {};
+static inline void setup_trampoline_page_table(void) {}
+static inline void reserve_trampoline_memory(void) {}
#endif /* CONFIG_X86_TRAMPOLINE */
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index c0427295e8f5..1ca132fc0d03 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -59,5 +59,7 @@ extern void check_tsc_sync_source(int cpu);
extern void check_tsc_sync_target(void);
extern int notsc_setup(char *);
+extern void save_sched_clock_state(void);
+extern void restore_sched_clock_state(void);
#endif /* _ASM_X86_TSC_H */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index abd3e0ea762a..99f0ad753f32 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -42,7 +42,7 @@
* Returns 0 if the range is valid, nonzero otherwise.
*
* This is equivalent to the following test:
- * (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64)
+ * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64)
*
* This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
*/
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e77b22083721..0357c514fc7f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -11,6 +11,8 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
+CFLAGS_REMOVE_pvclock.o = -pg
+CFLAGS_REMOVE_kvmclock.o = -pg
CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
endif
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index c05872aa3ce0..510bc6db1bd3 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -72,6 +72,7 @@ u8 acpi_sci_flags __initdata;
int acpi_sci_override_gsi __initdata;
int acpi_skip_timer_override __initdata;
int acpi_use_timer_override __initdata;
+int acpi_fix_pin2_polarity __initdata;
#ifdef CONFIG_X86_LOCAL_APIC
static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -410,10 +411,15 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
return 0;
}
- if (acpi_skip_timer_override &&
- intsrc->source_irq == 0 && intsrc->global_irq == 2) {
- printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
- return 0;
+ if (intsrc->source_irq == 0 && intsrc->global_irq == 2) {
+ if (acpi_skip_timer_override) {
+ printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
+ return 0;
+ }
+ if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
+ intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK;
+ printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n");
+ }
}
mp_override_legacy_irq(intsrc->source_irq,
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0d20286d78c6..e26ac4035754 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -25,6 +25,7 @@
#include <linux/dma-mapping.h>
#include <linux/iommu-helper.h>
#include <linux/iommu.h>
+#include <asm/msidef.h>
#include <asm/proto.h>
#include <asm/iommu.h>
#include <asm/gart.h>
@@ -1048,7 +1049,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
{
int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
struct amd_iommu *iommu;
- unsigned long i;
+ unsigned long i, old_size;
#ifdef CONFIG_IOMMU_STRESS
populate = false;
@@ -1084,8 +1085,21 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
}
}
+ old_size = dma_dom->aperture_size;
dma_dom->aperture_size += APERTURE_RANGE_SIZE;
+ /* Reserve address range used for MSI messages */
+ if (old_size < MSI_ADDR_BASE_LO &&
+ dma_dom->aperture_size > MSI_ADDR_BASE_LO) {
+ unsigned long spage;
+ int pages;
+
+ pages = iommu_num_pages(MSI_ADDR_BASE_LO, 0x10000, PAGE_SIZE);
+ spage = MSI_ADDR_BASE_LO >> PAGE_SHIFT;
+
+ dma_ops_reserve_addresses(dma_dom, spage, pages);
+ }
+
/* Intialize the exclusion range if necessary */
for_each_iommu(iommu) {
if (iommu->exclusion_start &&
@@ -1953,6 +1967,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
size_t size,
int dir)
{
+ dma_addr_t flush_addr;
dma_addr_t i, start;
unsigned int pages;
@@ -1960,6 +1975,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
(dma_addr + size > dma_dom->aperture_size))
return;
+ flush_addr = dma_addr;
pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
dma_addr &= PAGE_MASK;
start = dma_addr;
@@ -1974,7 +1990,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
dma_ops_free_addresses(dma_dom, dma_addr, pages);
if (amd_iommu_unmap_flush || dma_dom->need_flush) {
- iommu_flush_pages(&dma_dom->domain, dma_addr, size);
+ iommu_flush_pages(&dma_dom->domain, flush_addr, size);
dma_dom->need_flush = false;
}
}
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 3cc63e2b8dd4..82cbee9547c7 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -632,6 +632,13 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
iommu->last_device = calc_devid(MMIO_GET_BUS(range),
MMIO_GET_LD(range));
iommu->evt_msi_num = MMIO_MSI_NUM(misc);
+
+ if (is_rd890_iommu(iommu->dev)) {
+ pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]);
+ pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]);
+ pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]);
+ pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]);
+ }
}
/*
@@ -643,35 +650,15 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
{
u8 *p = (u8 *)h;
u8 *end = p, flags = 0;
- u16 dev_i, devid = 0, devid_start = 0, devid_to = 0;
- u32 ext_flags = 0;
+ u16 devid = 0, devid_start = 0, devid_to = 0;
+ u32 dev_i, ext_flags = 0;
bool alias = false;
struct ivhd_entry *e;
/*
- * First set the recommended feature enable bits from ACPI
- * into the IOMMU control registers
+ * First save the recommended feature enable bits from ACPI
*/
- h->flags & IVHD_FLAG_HT_TUN_EN_MASK ?
- iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
- iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
-
- h->flags & IVHD_FLAG_PASSPW_EN_MASK ?
- iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
- iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
-
- h->flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
- iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
- iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
-
- h->flags & IVHD_FLAG_ISOC_EN_MASK ?
- iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
- iommu_feature_disable(iommu, CONTROL_ISOC_EN);
-
- /*
- * make IOMMU memory accesses cache coherent
- */
- iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
+ iommu->acpi_flags = h->flags;
/*
* Done. Now parse the device entries
@@ -819,7 +806,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
/* Initializes the device->iommu mapping for the driver */
static int __init init_iommu_devices(struct amd_iommu *iommu)
{
- u16 i;
+ u32 i;
for (i = iommu->first_device; i <= iommu->last_device; ++i)
set_iommu_for_device(iommu, i);
@@ -1108,7 +1095,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
*/
static void init_device_table(void)
{
- u16 devid;
+ u32 devid;
for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
set_dev_entry_bit(devid, DEV_ENTRY_VALID);
@@ -1116,6 +1103,40 @@ static void init_device_table(void)
}
}
+static void iommu_init_flags(struct amd_iommu *iommu)
+{
+ iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
+ iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
+ iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
+
+ iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
+ iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
+ iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
+
+ iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
+ iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
+ iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
+
+ iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
+ iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
+ iommu_feature_disable(iommu, CONTROL_ISOC_EN);
+
+ /*
+ * make IOMMU memory accesses cache coherent
+ */
+ iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
+}
+
+static void iommu_apply_quirks(struct amd_iommu *iommu)
+{
+ if (is_rd890_iommu(iommu->dev)) {
+ pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]);
+ pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]);
+ pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]);
+ pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]);
+ }
+}
+
/*
* This function finally enables all IOMMUs found in the system after
* they have been initialized
@@ -1126,6 +1147,8 @@ static void enable_iommus(void)
for_each_iommu(iommu) {
iommu_disable(iommu);
+ iommu_apply_quirks(iommu);
+ iommu_init_flags(iommu);
iommu_set_device_table(iommu);
iommu_enable_command_buffer(iommu);
iommu_enable_event_buffer(iommu);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a96489ee6cab..6583884a83fb 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1340,6 +1340,14 @@ void __cpuinit end_local_APIC_setup(void)
setup_apic_nmi_watchdog(NULL);
apic_pm_activate();
+
+ /*
+ * Now that local APIC setup is completed for BP, configure the fault
+ * handling for interrupt remapping.
+ */
+ if (!smp_processor_id() && intr_remapping_enabled)
+ enable_drhd_fault_handling();
+
}
#ifdef CONFIG_X86_X2APIC
@@ -1606,7 +1614,7 @@ void __init init_apic_mappings(void)
* acpi lapic path already maps that address in
* acpi_register_lapic_address()
*/
- if (!acpi_lapic)
+ if (!acpi_lapic && !smp_found_config)
set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e41ed24ab26d..4d90327853b7 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -306,14 +306,19 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc,
old_cfg = old_desc->chip_data;
- memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+ cfg->vector = old_cfg->vector;
+ cfg->move_in_progress = old_cfg->move_in_progress;
+ cpumask_copy(cfg->domain, old_cfg->domain);
+ cpumask_copy(cfg->old_domain, old_cfg->old_domain);
init_copy_irq_2_pin(old_cfg, cfg, node);
}
-static void free_irq_cfg(struct irq_cfg *old_cfg)
+static void free_irq_cfg(struct irq_cfg *cfg)
{
- kfree(old_cfg);
+ free_cpumask_var(cfg->domain);
+ free_cpumask_var(cfg->old_domain);
+ kfree(cfg);
}
void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
@@ -1392,6 +1397,7 @@ int setup_ioapic_entry(int apic_id, int irq,
irte.dlvry_mode = apic->irq_delivery_mode;
irte.vector = vector;
irte.dest_id = IRTE_DEST(destination);
+ irte.redir_hint = 1;
/* Set source-id of interrupt request */
set_ioapic_sid(&irte, apic_id);
@@ -1728,6 +1734,8 @@ __apicdebuginit(void) print_IO_APIC(void)
struct irq_pin_list *entry;
cfg = desc->chip_data;
+ if (!cfg)
+ continue;
entry = cfg->irq_2_pin;
if (!entry)
continue;
@@ -3341,6 +3349,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
irte.dlvry_mode = apic->irq_delivery_mode;
irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
+ irte.redir_hint = 1;
/* Set source-id of interrupt request */
if (pdev)
@@ -3397,7 +3406,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
cfg = desc->chip_data;
- read_msi_msg_desc(desc, &msg);
+ get_cached_msi_msg_desc(desc, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
@@ -3617,6 +3626,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+ msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
dmar_msi_write(irq, &msg);
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 83e9be4778e2..fac49a845064 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -76,13 +76,6 @@ void __init default_setup_apic_routing(void)
/* need to update phys_pkg_id */
apic->phys_pkg_id = apicid_phys_pkg_id;
}
-
- /*
- * Now that apic routing model is selected, configure the
- * fault handling for intr remapping.
- */
- if (intr_remapping_enabled)
- enable_drhd_fault_handling();
}
/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 10fa5684a662..7369b4c2c55a 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -33,6 +33,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
{ X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 },
{ X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006 },
+ { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006 },
{ X86_FEATURE_CPB, CR_EDX, 9, 0x80000007 },
{ X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a },
{ X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a },
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e485825130d2..29dfd22ab642 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -305,8 +305,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
/* use socket ID also for last level cache */
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
/* fixup topology information on multi-node processors */
- if ((c->x86 == 0x10) && (c->x86_model == 9))
- amd_fixup_dcm(c);
+ amd_fixup_dcm(c);
#endif
}
@@ -565,6 +564,35 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
}
}
#endif
+
+ /*
+ * Family 0x12 and above processors have APIC timer
+ * running in deep C states.
+ */
+ if (c->x86 > 0x11)
+ set_cpu_cap(c, X86_FEATURE_ARAT);
+
+ /*
+ * Disable GART TLB Walk Errors on Fam10h. We do this here
+ * because this is always needed when GART is enabled, even in a
+ * kernel which has no MCE support built in.
+ */
+ if (c->x86 == 0x10) {
+ /*
+ * BIOS should disable GartTlbWlk Errors themself. If
+ * it doesn't do it here as suggested by the BKDG.
+ *
+ * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012
+ */
+ u64 mask;
+ int err;
+
+ err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask);
+ if (err == 0) {
+ mask |= (1 << 10);
+ checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask);
+ }
+ }
}
#ifdef CONFIG_X86_32
@@ -609,3 +637,68 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
};
cpu_dev_register(amd_cpu_dev);
+
+/*
+ * AMD errata checking
+ *
+ * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or
+ * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that
+ * have an OSVW id assigned, which it takes as first argument. Both take a
+ * variable number of family-specific model-stepping ranges created by
+ * AMD_MODEL_RANGE(). Each erratum also has to be declared as extern const
+ * int[] in arch/x86/include/asm/processor.h.
+ *
+ * Example:
+ *
+ * const int amd_erratum_319[] =
+ * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),
+ * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),
+ * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0));
+ */
+
+const int amd_erratum_400[] =
+ AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
+ AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
+
+
+bool cpu_has_amd_erratum(const int *erratum)
+{
+ struct cpuinfo_x86 *cpu = &current_cpu_data;
+ int osvw_id = *erratum++;
+ u32 range;
+ u32 ms;
+
+ /*
+ * If called early enough that current_cpu_data hasn't been initialized
+ * yet, fall back to boot_cpu_data.
+ */
+ if (cpu->x86 == 0)
+ cpu = &boot_cpu_data;
+
+ if (cpu->x86_vendor != X86_VENDOR_AMD)
+ return false;
+
+ if (osvw_id >= 0 && osvw_id < 65536 &&
+ cpu_has(cpu, X86_FEATURE_OSVW)) {
+ u64 osvw_len;
+
+ rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len);
+ if (osvw_id < osvw_len) {
+ u64 osvw_bits;
+
+ rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6),
+ osvw_bits);
+ return osvw_bits & (1ULL << (osvw_id & 0x3f));
+ }
+ }
+
+ /* OSVW unavailable or ID unknown, match family-model-stepping range */
+ ms = (cpu->x86_model << 4) | cpu->x86_mask;
+ while ((range = *erratum++))
+ if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
+ (ms >= AMD_MODEL_RANGE_START(range)) &&
+ (ms <= AMD_MODEL_RANGE_END(range)))
+ return true;
+
+ return false;
+}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 68e4a6f2211e..d938871e3d83 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -537,7 +537,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
}
}
-static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
+void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 tfms, xlvl;
u32 ebx;
@@ -576,6 +576,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
if (c->extended_cpuid_level >= 0x80000007)
c->x86_power = cpuid_edx(0x80000007);
+ init_scattered_cpuid_features(c);
}
static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
@@ -731,7 +732,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
get_model_name(c); /* Default name */
- init_scattered_cpuid_features(c);
detect_nopl(c);
}
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 3624e8a0f71b..f668bb1f7d43 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -33,5 +33,6 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
*const __x86_cpu_dev_end[];
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+extern void get_cpu_cap(struct cpuinfo_x86 *c);
#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 1d3cddaa40ee..5384b0418428 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -704,6 +704,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
per_cpu(acfreq_data, policy->cpu) = NULL;
acpi_processor_unregister_performance(data->acpi_data,
policy->cpu);
+ kfree(data->freq_table);
kfree(data);
}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 85f69cdeae10..8e82a525b80c 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -39,6 +39,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID;
wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
c->cpuid_level = cpuid_eax(0);
+ get_cpu_cap(c);
}
}
@@ -449,6 +450,24 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_VMX))
detect_vmx_virtcap(c);
+
+ /*
+ * Initialize MSR_IA32_ENERGY_PERF_BIAS if BIOS did not.
+ * x86_energy_perf_policy(8) is available to change it at run-time
+ */
+ if (cpu_has(c, X86_FEATURE_EPB)) {
+ u64 epb;
+
+ rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
+ if ((epb & 0xF) == 0) {
+ printk_once(KERN_WARNING, "x86: updated energy_perf_bias"
+ " to 'normal' from 'performance'\n"
+ "You can view and update epb via utility,"
+ " such as x86_energy_perf_policy(8)\n");
+ epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
+ wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
+ }
+ }
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 224392d8fe8c..1bdce3410b5e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -141,6 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
address = (low & MASK_BLKPTR_LO) >> 21;
if (!address)
break;
+
address += MCG_XBLK_ADDR;
} else
++address;
@@ -148,12 +149,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
if (rdmsr_safe(address, &low, &high))
break;
- if (!(high & MASK_VALID_HI)) {
- if (block)
- continue;
- else
- break;
- }
+ if (!(high & MASK_VALID_HI))
+ continue;
if (!(high & MASK_CNTP_HI) ||
(high & MASK_LOCKED_HI))
@@ -472,6 +469,7 @@ recurse:
out_free:
if (b) {
kobject_put(&b->kobj);
+ list_del(&b->miscj);
kfree(b);
}
return err;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index e1a0a3bf9716..22100c649e74 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -293,18 +293,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
*/
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+ h = lvtthmr_init;
/*
* The initial value of thermal LVT entries on all APs always reads
* 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
* sequence to them and LVT registers are reset to 0s except for
* the mask bits which are set to 1s when APs receive INIT IPI.
- * Always restore the value that BIOS has programmed on AP based on
- * BSP's info we saved since BIOS is always setting the same value
- * for all threads/cores
+ * If BIOS takes over the thermal interrupt and sets its interrupt
+ * delivery mode to SMI (not fixed), it restores the value that the
+ * BIOS has programmed on AP based on BSP's info we saved since BIOS
+ * is always setting the same value for all threads/cores.
*/
- apic_write(APIC_LVTTHMR, lvtthmr_init);
+ if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
+ apic_write(APIC_LVTTHMR, lvtthmr_init);
- h = lvtthmr_init;
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
printk(KERN_DEBUG
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 06130b52f012..a67038401129 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -827,7 +827,7 @@ int __init amd_special_default_mtrr(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
return 0;
- if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
+ if (boot_cpu_data.x86 < 0xf)
return 0;
/* In case some hypervisor doesn't pass SYSCFG through: */
if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 01c0f3ee6cc3..675ac513dbe2 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -247,6 +247,25 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
unsigned long flags;
int cpu;
+#ifdef CONFIG_SMP
+ /*
+ * If this cpu is not yet active, we are in the cpu online path. There
+ * can be no stop_machine() in parallel, as stop machine ensures this
+ * by using get_online_cpus(). We can skip taking the stop_cpus_mutex,
+ * as we don't need it and also we can't afford to block while waiting
+ * for the mutex.
+ *
+ * If this cpu is active, we need to prevent stop_machine() happening
+ * in parallel by taking the stop cpus mutex.
+ *
+ * Also, this is called in the context of cpu online path or in the
+ * context where cpu hotplug is prevented. So checking the active status
+ * of the raw_smp_processor_id() is safe.
+ */
+ if (cpu_active(raw_smp_processor_id()))
+ mutex_lock(&stop_cpus_mutex);
+#endif
+
preempt_disable();
data.smp_reg = reg;
@@ -292,14 +311,24 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
/*
* HACK!
- * We use this same function to initialize the mtrrs on boot.
- * The state of the boot cpu's mtrrs has been saved, and we want
- * to replicate across all the APs.
- * If we're doing that @reg is set to something special...
+ *
+ * We use this same function to initialize the mtrrs during boot,
+ * resume, runtime cpu online and on an explicit request to set a
+ * specific MTRR.
+ *
+ * During boot or suspend, the state of the boot cpu's mtrrs has been
+ * saved, and we want to replicate that across all the cpus that come
+ * online (either at the end of boot or resume or during a runtime cpu
+ * online). If we're doing that, @reg is set to something special and on
+ * this cpu we still do mtrr_if->set_all(). During boot/resume, this
+ * is unnecessary if at this point we are still on the cpu that started
+ * the boot/resume sequence. But there is no guarantee that we are still
+ * on the same cpu. So we do mtrr_if->set_all() on this cpu aswell to be
+ * sure that we are in sync with everyone else.
*/
if (reg != ~0U)
mtrr_if->set(reg, base, size, type);
- else if (!mtrr_aps_delayed_init)
+ else
mtrr_if->set_all();
/* Wait for the others */
@@ -319,6 +348,11 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
local_irq_restore(flags);
preempt_enable();
+
+#ifdef CONFIG_SMP
+ if (cpu_active(raw_smp_processor_id()))
+ mutex_unlock(&stop_cpus_mutex);
+#endif
}
/**
@@ -793,13 +827,21 @@ void set_mtrr_aps_delayed_init(void)
}
/*
- * MTRR initialization for all AP's
+ * Delayed MTRR initialization for all AP's
*/
void mtrr_aps_init(void)
{
if (!use_intel())
return;
+ /*
+ * Check if someone has requested the delay of AP MTRR initialization,
+ * by doing set_mtrr_aps_delayed_init(), prior to this point. If not,
+ * then we are done.
+ */
+ if (!mtrr_aps_delayed_init)
+ return;
+
set_mtrr(~0U, 0, 0, 0);
mtrr_aps_delayed_init = false;
}
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index c2897b7b4a3b..46d58448c3af 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
- [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
+ [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
@@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
- [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
+ [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 214ac860ebe0..d8d86d014008 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added)
* Intel Errata AAP53 (model 30)
* Intel Errata BD53 (model 44)
*
- * These chips need to be 'reset' when adding counters by programming
- * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5
- * either in sequence on the same PMC or on different PMCs.
+ * The official story:
+ * These chips need to be 'reset' when adding counters by programming the
+ * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
+ * in sequence on the same PMC or on different PMCs.
+ *
+ * In practise it appears some of these events do in fact count, and
+ * we need to programm all 4 events.
*/
-static void intel_pmu_nhm_enable_all(int added)
+static void intel_pmu_nhm_workaround(void)
{
- if (added) {
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- int i;
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ static const unsigned long nhm_magic[4] = {
+ 0x4300B5,
+ 0x4300D2,
+ 0x4300B1,
+ 0x4300B1
+ };
+ struct perf_event *event;
+ int i;
+
+ /*
+ * The Errata requires below steps:
+ * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
+ * 2) Configure 4 PERFEVTSELx with the magic events and clear
+ * the corresponding PMCx;
+ * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
+ * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
+ * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
+ */
+
+ /*
+ * The real steps we choose are a little different from above.
+ * A) To reduce MSR operations, we don't run step 1) as they
+ * are already cleared before this function is called;
+ * B) Call x86_perf_event_update to save PMCx before configuring
+ * PERFEVTSELx with magic number;
+ * C) With step 5), we do clear only when the PERFEVTSELx is
+ * not used currently.
+ * D) Call x86_perf_event_set_period to restore PMCx;
+ */
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2);
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1);
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5);
+ /* We always operate 4 pairs of PERF Counters */
+ for (i = 0; i < 4; i++) {
+ event = cpuc->events[i];
+ if (event)
+ x86_perf_event_update(event);
+ }
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
+ for (i = 0; i < 4; i++) {
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
+ wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
+ }
- for (i = 0; i < 3; i++) {
- struct perf_event *event = cpuc->events[i];
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
- if (!event)
- continue;
+ for (i = 0; i < 4; i++) {
+ event = cpuc->events[i];
+ if (event) {
+ x86_perf_event_set_period(event);
__x86_pmu_enable_event(&event->hw,
- ARCH_PERFMON_EVENTSEL_ENABLE);
- }
+ ARCH_PERFMON_EVENTSEL_ENABLE);
+ } else
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
}
+}
+
+static void intel_pmu_nhm_enable_all(int added)
+{
+ if (added)
+ intel_pmu_nhm_workaround();
intel_pmu_enable_all(added);
}
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ae85d69644d1..a187365d98b4 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -457,6 +457,8 @@ static int p4_hw_config(struct perf_event *event)
event->hw.config |= event->attr.config &
(p4_config_pack_escr(P4_ESCR_MASK_HT) |
p4_config_pack_cccr(P4_CCCR_MASK_HT));
+
+ event->hw.config &= ~P4_CCCR_FORCE_OVF;
}
rc = x86_setup_perfctr(event);
@@ -581,6 +583,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ int overflow;
if (!test_bit(idx, cpuc->active_mask))
continue;
@@ -591,12 +594,14 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
WARN_ON_ONCE(hwc->idx != idx);
/* it might be unflagged overflow */
- handled = p4_pmu_clear_cccr_ovf(hwc);
+ overflow = p4_pmu_clear_cccr_ovf(hwc);
val = x86_perf_event_update(event);
- if (!handled && (val & (1ULL << (x86_pmu.cntval_bits - 1))))
+ if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1))))
continue;
+ handled += overflow;
+
/* event overflow for sure */
data.period = event->hw.last_period;
@@ -612,7 +617,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
inc_irq_stat(apic_perf_irqs);
}
- return handled;
+ return handled > 0;
}
/*
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index b9d1ff588445..ce9c6c28de77 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -23,6 +23,7 @@
#include <linux/dmi.h>
#include <linux/module.h>
+#include <linux/jiffies.h>
#include <asm/div64.h>
#include <asm/x86_init.h>
#include <asm/hypervisor.h>
@@ -51,7 +52,7 @@ static inline int __vmware_platform(void)
static unsigned long vmware_get_tsc_khz(void)
{
- uint64_t tsc_hz;
+ uint64_t tsc_hz, lpj;
uint32_t eax, ebx, ecx, edx;
VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
@@ -62,6 +63,13 @@ static unsigned long vmware_get_tsc_khz(void)
printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n",
(unsigned long) tsc_hz / 1000,
(unsigned long) tsc_hz % 1000);
+
+ if (!preset_lpj) {
+ lpj = ((u64)tsc_hz * 1000);
+ do_div(lpj, HZ);
+ preset_lpj = lpj;
+ }
+
return tsc_hz;
}
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index 045b36cada65..994828899e09 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -34,7 +34,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
if (!csize)
return 0;
- vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
+ vaddr = ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE);
if (!vaddr)
return -ENOMEM;
@@ -46,6 +46,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
} else
memcpy(buf, vaddr + offset, csize);
+ set_iounmap_nonlazy();
iounmap(vaddr);
return csize;
}
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 0d6fc71bedb1..28b09af9775c 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -980,15 +980,21 @@ static int __init parse_memopt(char *p)
if (!p)
return -EINVAL;
-#ifdef CONFIG_X86_32
if (!strcmp(p, "nopentium")) {
+#ifdef CONFIG_X86_32
setup_clear_cpu_cap(X86_FEATURE_PSE);
return 0;
- }
+#else
+ printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n");
+ return -EINVAL;
#endif
+ }
userdef = 1;
mem_size = memparse(p, &p);
+ /* don't remove all of memory when handling "mem={invalid}" param */
+ if (mem_size == 0)
+ return -EINVAL;
e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
return 0;
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index e5cc7e82e60d..f67a33c7f415 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -18,7 +18,6 @@
#include <asm/apic.h>
#include <asm/iommu.h>
#include <asm/gart.h>
-#include <asm/hpet.h>
static void __init fix_hypertransport_config(int num, int slot, int func)
{
@@ -146,15 +145,10 @@ static void __init ati_bugs(int num, int slot, int func)
static u32 __init ati_sbx00_rev(int num, int slot, int func)
{
- u32 old, d;
+ u32 d;
- d = read_pci_config(num, slot, func, 0x70);
- old = d;
- d &= ~(1<<8);
- write_pci_config(num, slot, func, 0x70, d);
d = read_pci_config(num, slot, func, 0x8);
d &= 0xff;
- write_pci_config(num, slot, func, 0x70, old);
return d;
}
@@ -163,11 +157,19 @@ static void __init ati_bugs_contd(int num, int slot, int func)
{
u32 d, rev;
- if (acpi_use_timer_override)
+ rev = ati_sbx00_rev(num, slot, func);
+ if (rev >= 0x40)
+ acpi_fix_pin2_polarity = 1;
+
+ /*
+ * SB600: revisions 0x11, 0x12, 0x13, 0x14, ...
+ * SB700: revisions 0x39, 0x3a, ...
+ * SB800: revisions 0x40, 0x41, ...
+ */
+ if (rev >= 0x39)
return;
- rev = ati_sbx00_rev(num, slot, func);
- if (rev > 0x13)
+ if (acpi_use_timer_override)
return;
/* check for IRQ0 interrupt swap */
@@ -192,21 +194,6 @@ static void __init ati_bugs_contd(int num, int slot, int func)
}
#endif
-/*
- * Force the read back of the CMP register in hpet_next_event()
- * to work around the problem that the CMP register write seems to be
- * delayed. See hpet_next_event() for details.
- *
- * We do this on all SMBUS incarnations for now until we have more
- * information about the affected chipsets.
- */
-static void __init ati_hpet_bugs(int num, int slot, int func)
-{
-#ifdef CONFIG_HPET_TIMER
- hpet_readback_cmp = 1;
-#endif
-}
-
#define QFLAG_APPLY_ONCE 0x1
#define QFLAG_APPLIED 0x2
#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -236,8 +223,6 @@ static struct chipset early_qrk[] __initdata = {
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
{ PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
- { PCI_VENDOR_ID_ATI, PCI_ANY_ID,
- PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs },
{}
};
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 4db7c4d12ffa..2642cf9911f8 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1268,7 +1268,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
decl PER_CPU_VAR(irq_count)
jmp error_exit
CFI_ENDPROC
-END(do_hypervisor_callback)
+END(xen_do_hypervisor_callback)
/*
* Hypervisor uses this for application faults while it executes.
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 37c3d4b17d85..75e398199643 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -328,7 +328,7 @@ ENTRY(startup_32_smp)
/*
* Enable paging
*/
- movl $pa(swapper_pg_dir),%eax
+ movl pa(initial_page_table), %eax
movl %eax,%cr3 /* set the page table pointer.. */
movl %cr0,%eax
orl $X86_CR0_PG,%eax
@@ -608,6 +608,8 @@ ignore_int:
.align 4
ENTRY(initial_code)
.long i386_start_kernel
+ENTRY(initial_page_table)
+ .long pa(swapper_pg_dir)
/*
* BSS section
@@ -623,6 +625,10 @@ ENTRY(swapper_pg_dir)
#endif
swapper_pg_fixmap:
.fill 1024,4,0
+#ifdef CONFIG_X86_TRAMPOLINE
+ENTRY(trampoline_pg_dir)
+ .fill 1024,4,0
+#endif
ENTRY(empty_zero_page)
.fill 4096,1,0
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ba390d731175..917c66fca56d 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -36,7 +36,6 @@
unsigned long hpet_address;
u8 hpet_blockid; /* OS timer block num */
u8 hpet_msi_disable;
-u8 hpet_readback_cmp;
#ifdef CONFIG_PCI_MSI
static unsigned long hpet_num_timers;
@@ -396,23 +395,27 @@ static int hpet_next_event(unsigned long delta,
* at that point and we would wait for the next hpet interrupt
* forever. We found out that reading the CMP register back
* forces the transfer so we can rely on the comparison with
- * the counter register below.
+ * the counter register below. If the read back from the
+ * compare register does not match the value we programmed
+ * then we might have a real hardware problem. We can not do
+ * much about it here, but at least alert the user/admin with
+ * a prominent warning.
*
- * That works fine on those ATI chipsets, but on newer Intel
- * chipsets (ICH9...) this triggers due to an erratum: Reading
- * the comparator immediately following a write is returning
- * the old value.
+ * An erratum on some chipsets (ICH9,..), results in
+ * comparator read immediately following a write returning old
+ * value. Workaround for this is to read this value second
+ * time, when first read returns old value.
*
- * We restrict the read back to the affected ATI chipsets (set
- * by quirks) and also run it with hpet=verbose for debugging
- * purposes.
+ * In fact the write to the comparator register is delayed up
+ * to two HPET cycles so the workaround we tried to restrict
+ * the readback to those known to be borked ATI chipsets
+ * failed miserably. So we give up on optimizations forever
+ * and penalize all HPET incarnations unconditionally.
*/
- if (hpet_readback_cmp || hpet_verbose) {
- u32 cmp = hpet_readl(HPET_Tn_CMP(timer));
-
- if (cmp != cnt)
+ if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) {
+ if (hpet_readl(HPET_Tn_CMP(timer)) != cnt)
printk_once(KERN_WARNING
- "hpet: compare register read back failed.\n");
+ "hpet: compare register read back failed.\n");
}
return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
@@ -504,7 +507,7 @@ static int hpet_assign_irq(struct hpet_dev *dev)
{
unsigned int irq;
- irq = create_irq();
+ irq = create_irq_nr(0, -1);
if (!irq)
return -EINVAL;
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index a8f1b803d2fd..f3654702099a 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -421,6 +421,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
dr6_p = (unsigned long *)ERR_PTR(args->err);
dr6 = *dr6_p;
+ /* If it's a single step, TRAP bits are random */
+ if (dr6 & DR_STEP)
+ return NOTIFY_DONE;
+
/* Do an early return if no trap bits are set in DR6 */
if ((dr6 & DR_TRAP_BITS) == 0)
return NOTIFY_DONE;
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index e1af7c055c7d..67381a227d67 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -66,7 +66,6 @@ struct microcode_amd {
unsigned int mpb[0];
};
-#define UCODE_MAX_SIZE 2048
#define UCODE_CONTAINER_SECTION_HDR 8
#define UCODE_CONTAINER_HEADER_SIZE 12
@@ -125,6 +124,37 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
return 1;
}
+static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size)
+{
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+ unsigned int max_size, actual_size;
+
+#define F1XH_MPB_MAX_SIZE 2048
+#define F14H_MPB_MAX_SIZE 1824
+#define F15H_MPB_MAX_SIZE 4096
+
+ switch (c->x86) {
+ case 0x14:
+ max_size = F14H_MPB_MAX_SIZE;
+ break;
+ case 0x15:
+ max_size = F15H_MPB_MAX_SIZE;
+ break;
+ default:
+ max_size = F1XH_MPB_MAX_SIZE;
+ break;
+ }
+
+ actual_size = buf[4] + (buf[5] << 8);
+
+ if (actual_size > size || actual_size > max_size) {
+ pr_err("section size mismatch\n");
+ return 0;
+ }
+
+ return actual_size;
+}
+
static int apply_microcode_amd(int cpu)
{
u32 rev, dummy;
@@ -162,11 +192,11 @@ static int get_ucode_data(void *to, const u8 *from, size_t n)
}
static void *
-get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
+get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size)
{
- unsigned int total_size;
+ unsigned int actual_size = 0;
u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
- void *mc;
+ void *mc = NULL;
if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR))
return NULL;
@@ -176,23 +206,18 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
return NULL;
}
- total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
-
- if (total_size > size || total_size > UCODE_MAX_SIZE) {
- pr_err("error: size mismatch\n");
+ actual_size = verify_ucode_size(cpu, buf, size);
+ if (!actual_size)
return NULL;
- }
- mc = vmalloc(UCODE_MAX_SIZE);
- if (mc) {
- memset(mc, 0, UCODE_MAX_SIZE);
- if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR,
- total_size)) {
- vfree(mc);
- mc = NULL;
- } else
- *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
- }
+ mc = vmalloc(actual_size);
+ if (!mc)
+ return NULL;
+
+ memset(mc, 0, actual_size);
+ get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, actual_size);
+ *mc_size = actual_size + UCODE_CONTAINER_SECTION_HDR;
+
return mc;
}
@@ -258,7 +283,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
unsigned int uninitialized_var(mc_size);
struct microcode_header_amd *mc_header;
- mc = get_next_ucode(ucode_ptr, leftover, &mc_size);
+ mc = get_next_ucode(cpu, ucode_ptr, leftover, &mc_size);
if (!mc)
break;
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 356170262a93..2573689bda77 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -364,8 +364,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
/* For performance reasons, reuse mc area when possible */
if (!mc || mc_size > curr_mc_size) {
- if (mc)
- vfree(mc);
+ vfree(mc);
mc = vmalloc(mc_size);
if (!mc)
break;
@@ -374,13 +373,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
if (get_ucode_data(mc, ucode_ptr, mc_size) ||
microcode_sanity_check(mc) < 0) {
- vfree(mc);
break;
}
if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) {
- if (new_mc)
- vfree(new_mc);
+ vfree(new_mc);
new_rev = mc_header.rev;
new_mc = mc;
mc = NULL; /* trigger new vmalloc */
@@ -390,12 +387,10 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
leftover -= mc_size;
}
- if (mc)
- vfree(mc);
+ vfree(mc);
if (leftover) {
- if (new_mc)
- vfree(new_mc);
+ vfree(new_mc);
state = UCODE_ERROR;
goto out;
}
@@ -405,8 +400,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
goto out;
}
- if (uci->mc)
- vfree(uci->mc);
+ vfree(uci->mc);
uci->mc = (struct microcode_intel *)new_mc;
pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index d86dbf7e54be..d7b6f7fb4fec 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -274,6 +274,18 @@ static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)
void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { }
+static void __init smp_register_lapic_address(unsigned long address)
+{
+ mp_lapic_addr = address;
+
+ set_fixmap_nocache(FIX_APIC_BASE, address);
+ if (boot_cpu_physical_apicid == -1U) {
+ boot_cpu_physical_apicid = read_apic_id();
+ apic_version[boot_cpu_physical_apicid] =
+ GET_APIC_VERSION(apic_read(APIC_LVR));
+ }
+}
+
static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
{
char str[16];
@@ -295,6 +307,10 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
if (early)
return 1;
+ /* Initialize the lapic mapping */
+ if (!acpi_lapic)
+ smp_register_lapic_address(mpc->lapic);
+
if (mpc->oemptr)
x86_init.mpparse.smp_read_mpc_oem(mpc);
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 8297160c41b3..a23b38252b85 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -117,6 +117,7 @@ int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
unsigned long flags;
int ret = -EIO;
int i;
+ int restarts = 0;
spin_lock_irqsave(&ec_lock, flags);
@@ -173,7 +174,9 @@ restart:
if (wait_on_obf(0x6c, 1)) {
printk(KERN_ERR "olpc-ec: timeout waiting for"
" EC to provide data!\n");
- goto restart;
+ if (restarts++ < 10)
+ goto restart;
+ goto err;
}
outbuf[i] = inb(0x68);
printk(KERN_DEBUG "olpc-ec: received 0x%x\n",
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 0f7f130caa67..870e069863a4 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -80,6 +80,9 @@ static u32 gart_unmapped_entry;
#define AGPEXTERN
#endif
+/* GART can only remap to physical addresses < 1TB */
+#define GART_MAX_PHYS_ADDR (1ULL << 40)
+
/* backdoor interface to AGP driver */
AGPEXTERN int agp_memory_reserved;
AGPEXTERN __u32 *agp_gatt_table;
@@ -211,9 +214,13 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
size_t size, int dir, unsigned long align_mask)
{
unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE);
- unsigned long iommu_page = alloc_iommu(dev, npages, align_mask);
+ unsigned long iommu_page;
int i;
+ if (unlikely(phys_mem + size > GART_MAX_PHYS_ADDR))
+ return bad_dma_addr;
+
+ iommu_page = alloc_iommu(dev, npages, align_mask);
if (iommu_page == -1) {
if (!nonforced_iommu(dev, phys_mem, size))
return phys_mem;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e7e35219b32f..553b02f13094 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -525,42 +525,6 @@ static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
return (edx & MWAIT_EDX_C1);
}
-/*
- * Check for AMD CPUs, where APIC timer interrupt does not wake up CPU from C1e.
- * For more information see
- * - Erratum #400 for NPT family 0xf and family 0x10 CPUs
- * - Erratum #365 for family 0x11 (not affected because C1e not in use)
- */
-static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
-{
- u64 val;
- if (c->x86_vendor != X86_VENDOR_AMD)
- goto no_c1e_idle;
-
- /* Family 0x0f models < rev F do not have C1E */
- if (c->x86 == 0x0F && c->x86_model >= 0x40)
- return 1;
-
- if (c->x86 == 0x10) {
- /*
- * check OSVW bit for CPUs that are not affected
- * by erratum #400
- */
- if (cpu_has(c, X86_FEATURE_OSVW)) {
- rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val);
- if (val >= 2) {
- rdmsrl(MSR_AMD64_OSVW_STATUS, val);
- if (!(val & BIT(1)))
- goto no_c1e_idle;
- }
- }
- return 1;
- }
-
-no_c1e_idle:
- return 0;
-}
-
static cpumask_var_t c1e_mask;
static int c1e_detected;
@@ -638,7 +602,8 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
*/
printk(KERN_INFO "using mwait in idle threads.\n");
pm_idle = mwait_idle;
- } else if (check_c1e_idle(c)) {
+ } else if (cpu_has_amd_erratum(amd_erratum_400)) {
+ /* E400: APIC timer interrupt does not wake up CPU from C1e */
printk(KERN_INFO "using C1E aware idle routine\n");
pm_idle = c1e_idle;
} else
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8d128783af47..a3d0dc59067b 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -245,7 +245,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
set_user_gs(regs, 0);
regs->fs = 0;
- set_fs(USER_DS);
regs->ds = __USER_DS;
regs->es = __USER_DS;
regs->ss = __USER_DS;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3c2422a99f1f..9cb414381d44 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -337,7 +337,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
regs->cs = _cs;
regs->ss = _ss;
regs->flags = X86_EFLAGS_IF;
- set_fs(USER_DS);
/*
* Free the old FP and other extended state
*/
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 239427ca02af..a4f07c1cfc87 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -120,6 +120,11 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
static atomic64_t last_value = ATOMIC64_INIT(0);
+void pvclock_resume(void)
+{
+ atomic64_set(&last_value, 0);
+}
+
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
{
struct pvclock_shadow_time shadow;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e3af342fe83a..807e3c98bbbf 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -477,6 +477,22 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"),
},
},
+ { /* Handle problems with rebooting on the Latitude E5420. */
+ .callback = set_pci_reboot,
+ .ident = "Dell Latitude E5420",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5420"),
+ },
+ },
+ { /* Handle problems with rebooting on the Latitude E6420. */
+ .callback = set_pci_reboot,
+ .ident = "Dell Latitude E6420",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"),
+ },
+ },
{ }
};
@@ -641,7 +657,7 @@ void native_machine_shutdown(void)
/* O.K Now that I'm on the appropriate processor,
* stop all of the others.
*/
- smp_send_stop();
+ stop_other_cpus();
#endif
lapic_shutdown();
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 41235531b11c..36818f8ec2be 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -97,6 +97,8 @@ relocate_kernel:
ret
identity_mapped:
+ /* set return address to 0 if not preserving context */
+ pushl $0
/* store the start address on the stack */
pushl %edx
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 4de8f5b3d476..7a6f3b3be3cf 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -100,6 +100,8 @@ relocate_kernel:
ret
identity_mapped:
+ /* set return address to 0 if not preserving context */
+ pushq $0
/* store the start address on the stack */
pushq %rdx
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b4ae4acbd031..6600cfde6b7c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1008,6 +1008,8 @@ void __init setup_arch(char **cmdline_p)
paging_init();
x86_init.paging.pagetable_setup_done(swapper_pg_dir);
+ setup_trampoline_page_table();
+
tboot_probe();
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index d801210945d6..513deac7228d 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -159,10 +159,10 @@ asmlinkage void smp_reboot_interrupt(void)
irq_exit();
}
-static void native_smp_send_stop(void)
+static void native_stop_other_cpus(int wait)
{
unsigned long flags;
- unsigned long wait;
+ unsigned long timeout;
if (reboot_force)
return;
@@ -179,9 +179,12 @@ static void native_smp_send_stop(void)
if (num_online_cpus() > 1) {
apic->send_IPI_allbutself(REBOOT_VECTOR);
- /* Don't wait longer than a second */
- wait = USEC_PER_SEC;
- while (num_online_cpus() > 1 && wait--)
+ /*
+ * Don't wait longer than a second if the caller
+ * didn't ask us to wait.
+ */
+ timeout = USEC_PER_SEC;
+ while (num_online_cpus() > 1 && (wait || timeout--))
udelay(1);
}
@@ -227,7 +230,7 @@ struct smp_ops smp_ops = {
.smp_prepare_cpus = native_smp_prepare_cpus,
.smp_cpus_done = native_smp_cpus_done,
- .smp_send_stop = native_smp_send_stop,
+ .stop_other_cpus = native_stop_other_cpus,
.smp_send_reschedule = native_smp_send_reschedule,
.cpu_up = native_cpu_up,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 11015fd1abbc..40eb0f94e8c5 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -73,7 +73,6 @@
#ifdef CONFIG_X86_32
u8 apicid_2_node[MAX_APICID];
-static int low_mappings;
#endif
/* State of each CPU */
@@ -91,6 +90,25 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 };
static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
+
+/*
+ * We need this for trampoline_base protection from concurrent accesses when
+ * off- and onlining cores wildly.
+ */
+static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
+
+void cpu_hotplug_driver_lock()
+{
+ mutex_lock(&x86_cpu_hotplug_driver_mutex);
+}
+
+void cpu_hotplug_driver_unlock()
+{
+ mutex_unlock(&x86_cpu_hotplug_driver_mutex);
+}
+
+ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
+ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
#else
static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
#define get_idle_for_cpu(x) (idle_thread_array[(x)])
@@ -281,6 +299,18 @@ notrace static void __cpuinit start_secondary(void *unused)
* fragile that we want to limit the things done here to the
* most necessary things.
*/
+
+#ifdef CONFIG_X86_32
+ /*
+ * Switch away from the trampoline page-table
+ *
+ * Do this before cpu_init() because it needs to access per-cpu
+ * data which may not be mapped in the trampoline page-table.
+ */
+ load_cr3(swapper_pg_dir);
+ __flush_tlb_all();
+#endif
+
vmi_bringup();
cpu_init();
preempt_disable();
@@ -299,12 +329,6 @@ notrace static void __cpuinit start_secondary(void *unused)
legacy_pic->chip->unmask(0);
}
-#ifdef CONFIG_X86_32
- while (low_mappings)
- cpu_relax();
- __flush_tlb_all();
-#endif
-
/* This must be done before setting cpu_online_mask */
set_cpu_sibling_map(raw_smp_processor_id());
wmb();
@@ -754,6 +778,7 @@ do_rest:
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
+ initial_page_table = __pa(&trampoline_pg_dir);
#else
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
@@ -901,20 +926,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
-#ifdef CONFIG_X86_32
- /* init low mem mapping */
- clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
- flush_tlb_all();
- low_mappings = 1;
-
err = do_boot_cpu(apicid, cpu);
- zap_low_mappings(false);
- low_mappings = 0;
-#else
- err = do_boot_cpu(apicid, cpu);
-#endif
if (err) {
pr_debug("do_boot_cpu failed %d\n", err);
return -EIO;
@@ -1374,11 +1387,94 @@ void play_dead_common(void)
local_irq_disable();
}
+#define MWAIT_SUBSTATE_MASK 0xf
+#define MWAIT_SUBSTATE_SIZE 4
+
+#define CPUID_MWAIT_LEAF 5
+#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
+
+/*
+ * We need to flush the caches before going to sleep, lest we have
+ * dirty data in our caches when we come back up.
+ */
+static inline void mwait_play_dead(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ unsigned int highest_cstate = 0;
+ unsigned int highest_subcstate = 0;
+ int i;
+ void *mwait_ptr;
+
+ if (!cpu_has(&current_cpu_data, X86_FEATURE_MWAIT))
+ return;
+ if (!cpu_has(&current_cpu_data, X86_FEATURE_CLFLSH))
+ return;
+ if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
+ return;
+
+ eax = CPUID_MWAIT_LEAF;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+
+ /*
+ * eax will be 0 if EDX enumeration is not valid.
+ * Initialized below to cstate, sub_cstate value when EDX is valid.
+ */
+ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
+ eax = 0;
+ } else {
+ edx >>= MWAIT_SUBSTATE_SIZE;
+ for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
+ if (edx & MWAIT_SUBSTATE_MASK) {
+ highest_cstate = i;
+ highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
+ }
+ }
+ eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
+ (highest_subcstate - 1);
+ }
+
+ /*
+ * This should be a memory location in a cache line which is
+ * unlikely to be touched by other processors. The actual
+ * content is immaterial as it is not actually modified in any way.
+ */
+ mwait_ptr = &current_thread_info()->flags;
+
+ wbinvd();
+
+ while (1) {
+ /*
+ * The CLFLUSH is a workaround for erratum AAI65 for
+ * the Xeon 7400 series. It's not clear it is actually
+ * needed, but it should be harmless in either case.
+ * The WBINVD is insufficient due to the spurious-wakeup
+ * case where we return around the loop.
+ */
+ clflush(mwait_ptr);
+ __monitor(mwait_ptr, 0, 0);
+ mb();
+ __mwait(eax, 0);
+ }
+}
+
+static inline void hlt_play_dead(void)
+{
+ if (current_cpu_data.x86 >= 4)
+ wbinvd();
+
+ while (1) {
+ native_halt();
+ }
+}
+
void native_play_dead(void)
{
play_dead_common();
tboot_shutdown(TB_SHUTDOWN_WFS);
- wbinvd_halt();
+
+ mwait_play_dead(); /* Only returns on failure */
+ hlt_play_dead();
}
#else /* ... !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index c652ef62742d..e2a595257390 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -1,6 +1,7 @@
#include <linux/io.h>
#include <asm/trampoline.h>
+#include <asm/pgtable.h>
#include <asm/e820.h>
#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP)
@@ -37,3 +38,19 @@ unsigned long __trampinit setup_trampoline(void)
memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
return virt_to_phys(trampoline_base);
}
+
+void __init setup_trampoline_page_table(void)
+{
+#ifdef CONFIG_X86_32
+ /* Copy kernel address range */
+ clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ KERNEL_PGD_PTRS);
+
+ /* Initialize low mappings */
+ clone_pgd_range(trampoline_pg_dir,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ min_t(unsigned long, KERNEL_PGD_PTRS,
+ KERNEL_PGD_BOUNDARY));
+#endif
+}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 725ef4d17cd5..4d0f3ed34c41 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -568,6 +568,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
if (regs->flags & X86_VM_MASK) {
handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, 1);
+ preempt_conditional_cli(regs);
return;
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 9faf91ae1841..97cdbe8b732c 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -104,10 +104,14 @@ int __init notsc_setup(char *str)
__setup("notsc", notsc_setup);
+static int no_sched_irq_time;
+
static int __init tsc_setup(char *str)
{
if (!strcmp(str, "reliable"))
tsc_clocksource_reliable = 1;
+ if (!strncmp(str, "noirqtime", 9))
+ no_sched_irq_time = 1;
return 1;
}
@@ -626,6 +630,44 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
local_irq_restore(flags);
}
+static unsigned long long cyc2ns_suspend;
+
+void save_sched_clock_state(void)
+{
+ if (!sched_clock_stable)
+ return;
+
+ cyc2ns_suspend = sched_clock();
+}
+
+/*
+ * Even on processors with invariant TSC, TSC gets reset in some the
+ * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to
+ * arbitrary value (still sync'd across cpu's) during resume from such sleep
+ * states. To cope up with this, recompute the cyc2ns_offset for each cpu so
+ * that sched_clock() continues from the point where it was left off during
+ * suspend.
+ */
+void restore_sched_clock_state(void)
+{
+ unsigned long long offset;
+ unsigned long flags;
+ int cpu;
+
+ if (!sched_clock_stable)
+ return;
+
+ local_irq_save(flags);
+
+ __get_cpu_var(cyc2ns_offset) = 0;
+ offset = cyc2ns_suspend - sched_clock();
+
+ for_each_possible_cpu(cpu)
+ per_cpu(cyc2ns_offset, cpu) = offset;
+
+ local_irq_restore(flags);
+}
+
#ifdef CONFIG_CPU_FREQ
/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
@@ -764,6 +806,7 @@ void mark_tsc_unstable(char *reason)
if (!tsc_unstable) {
tsc_unstable = 1;
sched_clock_stable = 0;
+ disable_sched_clock_irqtime();
printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
/* Change only the rating, when not registered */
if (clocksource_tsc.mult)
@@ -952,6 +995,9 @@ void __init tsc_init(void)
/* now allow native_sched_clock() to use rdtsc */
tsc_disabled = 0;
+ if (!no_sched_irq_time)
+ enable_sched_clock_irqtime();
+
lpj = ((u64)tsc_khz * 1000);
do_div(lpj, HZ);
lpj_fine = lpj;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 5ffb5622f793..61fb98519622 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -551,8 +551,14 @@ cannot_handle:
int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
{
if (VMPI.is_vm86pus) {
- if ((trapno == 3) || (trapno == 1))
- return_to_32bit(regs, VM86_TRAP + (trapno << 8));
+ if ((trapno == 3) || (trapno == 1)) {
+ KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
+ /* setting this flag forces the code in entry_32.S to
+ call save_v86_state() and change the stack pointer
+ to KVM86->regs32 */
+ set_thread_flag(TIF_IRET);
+ return 0;
+ }
do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
return 0;
}
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 37e68fc5e24a..aa8bf4fcf725 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -305,7 +305,8 @@ void __cpuinit xsave_init(void)
*/
static void __init setup_xstate_init(void)
{
- init_xstate_buf = alloc_bootmem(xstate_size);
+ init_xstate_buf = alloc_bootmem_align(xstate_size,
+ __alignof__(struct xsave_struct));
init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
}
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5ac0bb465ed6..582c8fcad2e1 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -345,10 +345,10 @@ static u32 group_table[] = {
DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
0, 0, 0, 0,
[Group4*8] =
- ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
+ ByteOp | DstMem | SrcNone | ModRM | Lock, ByteOp | DstMem | SrcNone | ModRM | Lock,
0, 0, 0, 0, 0, 0,
[Group5*8] =
- DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
+ DstMem | SrcNone | ModRM | Lock, DstMem | SrcNone | ModRM | Lock,
SrcMem | ModRM | Stack, 0,
SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps,
SrcMem | ModRM | Stack, 0,
@@ -1712,17 +1712,16 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
{
struct decode_cache *c = &ctxt->decode;
- u64 old = c->dst.orig_val;
+ u64 old = c->dst.orig_val64;
if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
-
c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
ctxt->eflags &= ~EFLG_ZF;
} else {
- c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
- (u32) c->regs[VCPU_REGS_RBX];
+ c->dst.val64 = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
+ (u32) c->regs[VCPU_REGS_RBX];
ctxt->eflags |= EFLG_ZF;
}
@@ -2535,7 +2534,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
ctxt->vcpu);
if (rc != X86EMUL_CONTINUE)
goto done;
- c->src.orig_val = c->src.val;
+ c->src.orig_val64 = c->src.val64;
}
if (c->src2.type == OP_MEM) {
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 93825ff3338f..8a3324668cac 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -553,6 +553,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
s->irq_request_opaque = kvm;
s->pics[0].pics_state = s;
s->pics[1].pics_state = s;
+ s->pics[0].isr_ack = 0xff;
+ s->pics[1].isr_ack = 0xff;
/*
* Initialize PIO device
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index cd1f362f413d..4227b1aed869 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -45,7 +45,6 @@ struct kvm_kpic_state {
u8 irr; /* interrupt request register */
u8 imr; /* interrupt mask register */
u8 isr; /* interrupt service register */
- u8 isr_ack; /* interrupt ack detection */
u8 priority_add; /* highest irq priority */
u8 irq_base;
u8 read_reg_select;
@@ -58,6 +57,7 @@ struct kvm_kpic_state {
u8 init4; /* true if 4 byte init */
u8 elcr; /* PIIX edge/trigger selection */
u8 elcr_mask;
+ u8 isr_ack; /* interrupt ack detection */
struct kvm_pic *pics_state;
};
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b1ed0a1a5913..92b6ca4fb54d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -799,8 +799,12 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
ret = handler(kvm, &memslot->rmap[gfn_offset], data);
for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
- int idx = gfn_offset;
- idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
+ unsigned long idx;
+ int nr;
+
+ nr = KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL+j);
+ idx = (memslot->base_gfn+gfn_offset) / nr -
+ memslot->base_gfn / nr;
ret |= handler(kvm,
&memslot->lpage_info[j][idx].rmap_pde,
data);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 2331bdc2b549..e34452b92928 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -324,8 +324,32 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
break;
}
- if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
- continue;
+ if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) {
+ struct kvm_mmu_page *child;
+ unsigned direct_access;
+
+ if (level != gw->level)
+ continue;
+
+ /*
+ * For the direct sp, if the guest pte's dirty bit
+ * changed form clean to dirty, it will corrupt the
+ * sp's access: allow writable in the read-only sp,
+ * so we should update the spte at this point to get
+ * a new sp with the correct access.
+ */
+ direct_access = gw->pt_access & gw->pte_access;
+ if (!is_dirty_gpte(gw->ptes[gw->level - 1]))
+ direct_access &= ~ACC_WRITE_MASK;
+
+ child = page_header(*sptep & PT64_BASE_ADDR_MASK);
+ if (child->role.access == direct_access)
+ continue;
+
+ mmu_page_remove_parent_pte(child, sptep);
+ __set_spte(sptep, shadow_trap_nonpresent_pte);
+ kvm_flush_remote_tlbs(vcpu->kvm);
+ }
if (is_large_pte(*sptep)) {
rmap_remove(vcpu->kvm, sptep);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ce438e0fdd26..d103e15c64f3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -87,6 +87,14 @@ struct nested_state {
/* A VMEXIT is required but not yet emulated */
bool exit_required;
+ /*
+ * If we vmexit during an instruction emulation we need this to restore
+ * the l1 guest rip after the emulation
+ */
+ unsigned long vmexit_rip;
+ unsigned long vmexit_rsp;
+ unsigned long vmexit_rax;
+
/* cache for intercepts of the guest */
u16 intercept_cr_read;
u16 intercept_cr_write;
@@ -766,7 +774,6 @@ static void init_vmcb(struct vcpu_svm *svm)
control->iopm_base_pa = iopm_base;
control->msrpm_base_pa = __pa(svm->msrpm);
- control->tsc_offset = 0;
control->int_ctl = V_INTR_MASKING_MASK;
init_seg(&save->es);
@@ -902,6 +909,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
svm->asid_generation = 0;
init_vmcb(svm);
+ svm->vmcb->control.tsc_offset = 0-native_read_tsc();
fx_init(&svm->vcpu);
svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
@@ -1201,8 +1209,12 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if (old == new) {
/* cr0 write with ts and mp unchanged */
svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
- if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE)
+ if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) {
+ svm->nested.vmexit_rip = kvm_rip_read(vcpu);
+ svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
+ svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
return;
+ }
}
}
@@ -2398,6 +2410,23 @@ static int emulate_on_interception(struct vcpu_svm *svm)
return 1;
}
+static int cr0_write_interception(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ int r;
+
+ r = emulate_instruction(&svm->vcpu, 0, 0, 0);
+
+ if (svm->nested.vmexit_rip) {
+ kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip);
+ kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp);
+ kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax);
+ svm->nested.vmexit_rip = 0;
+ }
+
+ return r == EMULATE_DONE;
+}
+
static int cr8_write_interception(struct vcpu_svm *svm)
{
struct kvm_run *kvm_run = svm->vcpu.run;
@@ -2671,7 +2700,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_READ_CR4] = emulate_on_interception,
[SVM_EXIT_READ_CR8] = emulate_on_interception,
[SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
- [SVM_EXIT_WRITE_CR0] = emulate_on_interception,
+ [SVM_EXIT_WRITE_CR0] = cr0_write_interception,
[SVM_EXIT_WRITE_CR3] = emulate_on_interception,
[SVM_EXIT_WRITE_CR4] = emulate_on_interception,
[SVM_EXIT_WRITE_CR8] = cr8_write_interception,
@@ -3067,8 +3096,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
sync_lapic_to_cr8(vcpu);
save_host_msrs(vcpu);
- fs_selector = kvm_read_fs();
- gs_selector = kvm_read_gs();
+ savesegment(fs, fs_selector);
+ savesegment(gs, gs_selector);
ldt_selector = kvm_read_ldt();
svm->vmcb->save.cr2 = vcpu->arch.cr2;
/* required for live migration with NPT */
@@ -3155,10 +3184,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
- kvm_load_fs(fs_selector);
- kvm_load_gs(gs_selector);
- kvm_load_ldt(ldt_selector);
load_host_msrs(vcpu);
+ kvm_load_ldt(ldt_selector);
+ loadsegment(fs, fs_selector);
+#ifdef CONFIG_X86_64
+ load_gs_index(gs_selector);
+ wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
+#else
+ loadsegment(gs, gs_selector);
+#endif
reload_tss(vcpu);
@@ -3253,6 +3287,10 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
{
switch (func) {
+ case 0x80000001:
+ if (nested)
+ entry->ecx |= (1 << 2); /* Set SVM bit */
+ break;
case 0x8000000A:
entry->eax = 1; /* SVM revision 1 */
entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ee03679efe78..f1abe23da5d3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -177,6 +177,7 @@ static u64 construct_eptp(unsigned long root_hpa);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu);
+static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
static unsigned long *vmx_io_bitmap_a;
static unsigned long *vmx_io_bitmap_b;
@@ -745,7 +746,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
*/
vmx->host_state.ldt_sel = kvm_read_ldt();
vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
- vmx->host_state.fs_sel = kvm_read_fs();
+ savesegment(fs, vmx->host_state.fs_sel);
if (!(vmx->host_state.fs_sel & 7)) {
vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
vmx->host_state.fs_reload_needed = 0;
@@ -753,7 +754,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
vmcs_write16(HOST_FS_SELECTOR, 0);
vmx->host_state.fs_reload_needed = 1;
}
- vmx->host_state.gs_sel = kvm_read_gs();
+ savesegment(gs, vmx->host_state.gs_sel);
if (!(vmx->host_state.gs_sel & 7))
vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
else {
@@ -770,10 +771,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
#endif
#ifdef CONFIG_X86_64
- if (is_long_mode(&vmx->vcpu)) {
- rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ if (is_long_mode(&vmx->vcpu))
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
- }
#endif
for (i = 0; i < vmx->save_nmsrs; ++i)
kvm_set_shared_msr(vmx->guest_msrs[i].index,
@@ -783,35 +783,30 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
static void __vmx_load_host_state(struct vcpu_vmx *vmx)
{
- unsigned long flags;
-
if (!vmx->host_state.loaded)
return;
++vmx->vcpu.stat.host_state_reload;
vmx->host_state.loaded = 0;
- if (vmx->host_state.fs_reload_needed)
- kvm_load_fs(vmx->host_state.fs_sel);
+#ifdef CONFIG_X86_64
+ if (is_long_mode(&vmx->vcpu))
+ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+#endif
if (vmx->host_state.gs_ldt_reload_needed) {
kvm_load_ldt(vmx->host_state.ldt_sel);
- /*
- * If we have to reload gs, we must take care to
- * preserve our gs base.
- */
- local_irq_save(flags);
- kvm_load_gs(vmx->host_state.gs_sel);
#ifdef CONFIG_X86_64
- wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
+ load_gs_index(vmx->host_state.gs_sel);
+#else
+ loadsegment(gs, vmx->host_state.gs_sel);
#endif
- local_irq_restore(flags);
}
+ if (vmx->host_state.fs_reload_needed)
+ loadsegment(fs, vmx->host_state.fs_sel);
reload_tss();
#ifdef CONFIG_X86_64
- if (is_long_mode(&vmx->vcpu)) {
- rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
- wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
- }
+ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif
+ load_gdt(&__get_cpu_var(host_gdt));
}
static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -1314,6 +1309,8 @@ static int hardware_enable(void *garbage)
ept_sync_global();
+ store_gdt(&__get_cpu_var(host_gdt));
+
return 0;
}
@@ -2514,8 +2511,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
- vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs()); /* 22.2.4 */
- vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs()); /* 22.2.4 */
+ vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
+ vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
#ifdef CONFIG_X86_64
rdmsrl(MSR_FS_BASE, a);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7fa89c39c64f..eee5cdd29bc2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1910,9 +1910,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
0 /* Reserved, XSAVE, OSXSAVE */;
/* cpuid 0x80000001.ecx */
const u32 kvm_supported_word6_x86_features =
- F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
+ F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
- F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) |
+ F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) |
0 /* SKINIT */ | 0 /* WDT */;
/* all calls to cpuid_count() should be made on the same cpu */
@@ -2220,6 +2220,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
!kvm_exception_is_soft(vcpu->arch.exception.nr);
events->exception.nr = vcpu->arch.exception.nr;
events->exception.has_error_code = vcpu->arch.exception.has_error_code;
+ events->exception.pad = 0;
events->exception.error_code = vcpu->arch.exception.error_code;
events->interrupt.injected =
@@ -2233,13 +2234,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
events->nmi.injected = vcpu->arch.nmi_injected;
events->nmi.pending = vcpu->arch.nmi_pending;
events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
+ events->nmi.pad = 0;
events->sipi_vector = vcpu->arch.sipi_vector;
events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
| KVM_VCPUEVENT_VALID_SIPI_VECTOR
| KVM_VCPUEVENT_VALID_SHADOW);
-
+ memset(&events->reserved, 0, sizeof(events->reserved));
vcpu_put(vcpu);
}
@@ -2289,6 +2291,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
dbgregs->dr6 = vcpu->arch.dr6;
dbgregs->dr7 = vcpu->arch.dr7;
dbgregs->flags = 0;
+ memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
vcpu_put(vcpu);
}
@@ -2756,6 +2759,7 @@ static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
sizeof(ps->channels));
ps->flags = kvm->arch.vpit->pit_state.flags;
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+ memset(&ps->reserved, 0, sizeof(ps->reserved));
return r;
}
@@ -2825,10 +2829,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
if (is_dirty) {
struct kvm_memslots *slots, *old_slots;
- spin_lock(&kvm->mmu_lock);
- kvm_mmu_slot_remove_write_access(kvm, log->slot);
- spin_unlock(&kvm->mmu_lock);
-
slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
if (!slots)
goto out_free;
@@ -2841,6 +2841,11 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
synchronize_srcu_expedited(&kvm->srcu);
dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
kfree(old_slots);
+
+ spin_lock(&kvm->mmu_lock);
+ kvm_mmu_slot_remove_write_access(kvm, log->slot);
+ spin_unlock(&kvm->mmu_lock);
+
}
r = 0;
@@ -3152,6 +3157,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
now_ns = timespec_to_ns(&now);
user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
user_ns.flags = 0;
+ memset(&user_ns.pad, 0, sizeof(user_ns.pad));
r = -EFAULT;
if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
@@ -5438,6 +5444,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
int user_alloc)
{
int npages = memslot->npages;
+ int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
+
+ /* Prevent internal slot pages from being moved by fork()/COW. */
+ if (memslot->id >= KVM_MEMORY_SLOTS)
+ map_flags = MAP_SHARED | MAP_ANONYMOUS;
/*To keep backward compatibility with older userspace,
*x86 needs to hanlde !user_alloc case.
@@ -5450,7 +5461,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
userspace_addr = do_mmap(NULL, 0,
npages * PAGE_SIZE,
PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS,
+ map_flags,
0);
up_write(&current->mm->mmap_sem);
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index 4a5979aa6883..2cda60a06e65 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -25,150 +25,172 @@
CFI_ADJUST_CFA_OFFSET -4
.endm
-.macro BEGIN func reg
-$v = \reg
-
-ENTRY(atomic64_\func\()_386)
- CFI_STARTPROC
- LOCK $v
-
-.macro RETURN
- UNLOCK $v
+#define BEGIN(op) \
+.macro endp; \
+ CFI_ENDPROC; \
+ENDPROC(atomic64_##op##_386); \
+.purgem endp; \
+.endm; \
+ENTRY(atomic64_##op##_386); \
+ CFI_STARTPROC; \
+ LOCK v;
+
+#define ENDP endp
+
+#define RET \
+ UNLOCK v; \
ret
-.endm
-
-.macro END_
- CFI_ENDPROC
-ENDPROC(atomic64_\func\()_386)
-.purgem RETURN
-.purgem END_
-.purgem END
-.endm
-
-.macro END
-RETURN
-END_
-.endm
-.endm
-BEGIN read %ecx
- movl ($v), %eax
- movl 4($v), %edx
-END
-
-BEGIN set %esi
- movl %ebx, ($v)
- movl %ecx, 4($v)
-END
-
-BEGIN xchg %esi
- movl ($v), %eax
- movl 4($v), %edx
- movl %ebx, ($v)
- movl %ecx, 4($v)
-END
-
-BEGIN add %ecx
- addl %eax, ($v)
- adcl %edx, 4($v)
-END
-
-BEGIN add_return %ecx
- addl ($v), %eax
- adcl 4($v), %edx
- movl %eax, ($v)
- movl %edx, 4($v)
-END
-
-BEGIN sub %ecx
- subl %eax, ($v)
- sbbl %edx, 4($v)
-END
-
-BEGIN sub_return %ecx
+#define RET_ENDP \
+ RET; \
+ ENDP
+
+#define v %ecx
+BEGIN(read)
+ movl (v), %eax
+ movl 4(v), %edx
+RET_ENDP
+#undef v
+
+#define v %esi
+BEGIN(set)
+ movl %ebx, (v)
+ movl %ecx, 4(v)
+RET_ENDP
+#undef v
+
+#define v %esi
+BEGIN(xchg)
+ movl (v), %eax
+ movl 4(v), %edx
+ movl %ebx, (v)
+ movl %ecx, 4(v)
+RET_ENDP
+#undef v
+
+#define v %ecx
+BEGIN(add)
+ addl %eax, (v)
+ adcl %edx, 4(v)
+RET_ENDP
+#undef v
+
+#define v %ecx
+BEGIN(add_return)
+ addl (v), %eax
+ adcl 4(v), %edx
+ movl %eax, (v)
+ movl %edx, 4(v)
+RET_ENDP
+#undef v
+
+#define v %ecx
+BEGIN(sub)
+ subl %eax, (v)
+ sbbl %edx, 4(v)
+RET_ENDP
+#undef v
+
+#define v %ecx
+BEGIN(sub_return)
negl %edx
negl %eax
sbbl $0, %edx
- addl ($v), %eax
- adcl 4($v), %edx
- movl %eax, ($v)
- movl %edx, 4($v)
-END
-
-BEGIN inc %esi
- addl $1, ($v)
- adcl $0, 4($v)
-END
-
-BEGIN inc_return %esi
- movl ($v), %eax
- movl 4($v), %edx
+ addl (v), %eax
+ adcl 4(v), %edx
+ movl %eax, (v)
+ movl %edx, 4(v)
+RET_ENDP
+#undef v
+
+#define v %esi
+BEGIN(inc)
+ addl $1, (v)
+ adcl $0, 4(v)
+RET_ENDP
+#undef v
+
+#define v %esi
+BEGIN(inc_return)
+ movl (v), %eax
+ movl 4(v), %edx
addl $1, %eax
adcl $0, %edx
- movl %eax, ($v)
- movl %edx, 4($v)
-END
-
-BEGIN dec %esi
- subl $1, ($v)
- sbbl $0, 4($v)
-END
-
-BEGIN dec_return %esi
- movl ($v), %eax
- movl 4($v), %edx
+ movl %eax, (v)
+ movl %edx, 4(v)
+RET_ENDP
+#undef v
+
+#define v %esi
+BEGIN(dec)
+ subl $1, (v)
+ sbbl $0, 4(v)
+RET_ENDP
+#undef v
+
+#define v %esi
+BEGIN(dec_return)
+ movl (v), %eax
+ movl 4(v), %edx
subl $1, %eax
sbbl $0, %edx
- movl %eax, ($v)
- movl %edx, 4($v)
-END
+ movl %eax, (v)
+ movl %edx, 4(v)
+RET_ENDP
+#undef v
-BEGIN add_unless %ecx
+#define v %ecx
+BEGIN(add_unless)
addl %eax, %esi
adcl %edx, %edi
- addl ($v), %eax
- adcl 4($v), %edx
+ addl (v), %eax
+ adcl 4(v), %edx
cmpl %eax, %esi
je 3f
1:
- movl %eax, ($v)
- movl %edx, 4($v)
+ movl %eax, (v)
+ movl %edx, 4(v)
movl $1, %eax
2:
-RETURN
+ RET
3:
cmpl %edx, %edi
jne 1b
xorl %eax, %eax
jmp 2b
-END_
+ENDP
+#undef v
-BEGIN inc_not_zero %esi
- movl ($v), %eax
- movl 4($v), %edx
+#define v %esi
+BEGIN(inc_not_zero)
+ movl (v), %eax
+ movl 4(v), %edx
testl %eax, %eax
je 3f
1:
addl $1, %eax
adcl $0, %edx
- movl %eax, ($v)
- movl %edx, 4($v)
+ movl %eax, (v)
+ movl %edx, 4(v)
movl $1, %eax
2:
-RETURN
+ RET
3:
testl %edx, %edx
jne 1b
jmp 2b
-END_
+ENDP
+#undef v
-BEGIN dec_if_positive %esi
- movl ($v), %eax
- movl 4($v), %edx
+#define v %esi
+BEGIN(dec_if_positive)
+ movl (v), %eax
+ movl 4(v), %edx
subl $1, %eax
sbbl $0, %edx
js 1f
- movl %eax, ($v)
- movl %edx, 4($v)
+ movl %eax, (v)
+ movl %edx, 4(v)
1:
-END
+RET_ENDP
+#undef v
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 71100c98e337..a4899ae3975f 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -72,7 +72,7 @@ ENTRY(_copy_to_user)
addq %rdx,%rcx
jc bad_to_user
cmpq TI_addr_limit(%rax),%rcx
- jae bad_to_user
+ ja bad_to_user
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
CFI_ENDPROC
ENDPROC(_copy_to_user)
@@ -85,7 +85,7 @@ ENTRY(_copy_from_user)
addq %rdx,%rcx
jc bad_from_user
cmpq TI_addr_limit(%rax),%rcx
- jae bad_from_user
+ ja bad_from_user
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
CFI_ENDPROC
ENDPROC(_copy_from_user)
diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S
index 648fe4741782..f35eec78a68e 100644
--- a/arch/x86/lib/semaphore_32.S
+++ b/arch/x86/lib/semaphore_32.S
@@ -36,7 +36,7 @@
*/
#ifdef CONFIG_SMP
ENTRY(__write_lock_failed)
- CFI_STARTPROC simple
+ CFI_STARTPROC
FRAME
2: LOCK_PREFIX
addl $ RW_LOCK_BIAS,(%eax)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f62777940dfb..544ed251a40c 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -802,8 +802,10 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
up_read(&mm->mmap_sem);
/* Kernel mode? Handle exceptions or die: */
- if (!(error_code & PF_USER))
+ if (!(error_code & PF_USER)) {
no_context(regs, error_code, address);
+ return;
+ }
/* User-space => ok to do another page fault: */
if (is_prefetch(regs, error_code, address))
@@ -829,6 +831,13 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
unsigned long address, unsigned int fault)
{
if (fault & VM_FAULT_OOM) {
+ /* Kernel mode? Handle exceptions or die: */
+ if (!(error_code & PF_USER)) {
+ up_read(&current->mm->mmap_sem);
+ no_context(regs, error_code, address);
+ return;
+ }
+
out_of_memory(regs, error_code, address);
} else {
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 5c4ee422590e..2a72049068e7 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -160,8 +160,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
* section 8.1: in PAE mode we explicitly have to flush the
* TLB via cr3 if the top-level pgd is changed...
*/
- if (mm == current->active_mm)
- write_cr3(read_cr3());
+ flush_tlb_mm(mm);
}
#else /* !CONFIG_X86_PAE */
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index f9897f7a9ef1..9c0d0d399c30 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -420,9 +420,11 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
return -1;
}
- for_each_node_mask(i, nodes_parsed)
- e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
- nodes[i].end >> PAGE_SHIFT);
+ for (i = 0; i < num_node_memblks; i++)
+ e820_register_active_regions(memblk_nodeid[i],
+ node_memblk_range[i].start >> PAGE_SHIFT,
+ node_memblk_range[i].end >> PAGE_SHIFT);
+
/* for out of order entries in SRAT */
sort_node_map();
if (!nodes_cover_memory(nodes)) {
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index b28d2f1253bb..f1575c9a2572 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -568,8 +568,13 @@ static int __init init_sysfs(void)
int error;
error = sysdev_class_register(&oprofile_sysclass);
- if (!error)
- error = sysdev_register(&device_oprofile);
+ if (error)
+ return error;
+
+ error = sysdev_register(&device_oprofile);
+ if (error)
+ sysdev_class_unregister(&oprofile_sysclass);
+
return error;
}
@@ -580,8 +585,10 @@ static void exit_sysfs(void)
}
#else
-#define init_sysfs() do { } while (0)
-#define exit_sysfs() do { } while (0)
+
+static inline int init_sysfs(void) { return 0; }
+static inline void exit_sysfs(void) { }
+
#endif /* CONFIG_PM */
static int __init p4_init(char **cpu_type)
@@ -634,6 +641,18 @@ static int __init ppro_init(char **cpu_type)
if (force_arch_perfmon && cpu_has_arch_perfmon)
return 0;
+ /*
+ * Documentation on identifying Intel processors by CPU family
+ * and model can be found in the Intel Software Developer's
+ * Manuals (SDM):
+ *
+ * http://www.intel.com/products/processor/manuals/
+ *
+ * As of May 2010 the documentation for this was in the:
+ * "Intel 64 and IA-32 Architectures Software Developer's
+ * Manual Volume 3B: System Programming Guide", "Table B-1
+ * CPUID Signature Values of DisplayFamily_DisplayModel".
+ */
switch (cpu_model) {
case 0 ... 2:
*cpu_type = "i386/ppro";
@@ -652,15 +671,19 @@ static int __init ppro_init(char **cpu_type)
case 14:
*cpu_type = "i386/core";
break;
- case 15: case 23:
+ case 0x0f:
+ case 0x16:
+ case 0x17:
+ case 0x1d:
*cpu_type = "i386/core_2";
break;
+ case 0x1a:
+ case 0x1e:
case 0x2e:
- case 26:
spec = &op_arch_perfmon_spec;
*cpu_type = "i386/core_i7";
break;
- case 28:
+ case 0x1c:
*cpu_type = "i386/atom";
break;
default:
@@ -682,6 +705,8 @@ int __init op_nmi_init(struct oprofile_operations *ops)
char *cpu_type = NULL;
int ret = 0;
+ using_nmi = 0;
+
if (!cpu_has_apic)
return -ENODEV;
@@ -761,7 +786,10 @@ int __init op_nmi_init(struct oprofile_operations *ops)
mux_init(ops);
- init_sysfs();
+ ret = init_sysfs();
+ if (ret)
+ return ret;
+
using_nmi = 1;
printk(KERN_INFO "oprofile: using NMI interrupt.\n");
return 0;
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index b67a6b5aa8d4..42623310c968 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -484,21 +484,29 @@ static int __init_ibs_nmi(void)
return 0;
}
-/* initialize the APIC for the IBS interrupts if available */
+/*
+ * check and reserve APIC extended interrupt LVT offset for IBS if
+ * available
+ *
+ * init_ibs() preforms implicitly cpu-local operations, so pin this
+ * thread to its current CPU
+ */
+
static void init_ibs(void)
{
- ibs_caps = get_ibs_caps();
+ preempt_disable();
+ ibs_caps = get_ibs_caps();
if (!ibs_caps)
- return;
+ goto out;
- if (__init_ibs_nmi()) {
+ if (__init_ibs_nmi() < 0)
ibs_caps = 0;
- return;
- }
+ else
+ printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
- printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n",
- (unsigned)ibs_caps);
+out:
+ preempt_enable();
}
static int (*create_arch_files)(struct super_block *sb, struct dentry *root);
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 2ec04c424a62..15466c096ba5 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -34,6 +34,15 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = {
DMI_MATCH(DMI_PRODUCT_NAME, "x3800"),
},
},
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=16007 */
+ /* 2006 AMD HT/VIA system with two host bridges */
+ {
+ .callback = set_use_crs,
+ .ident = "ASRock ALiveSATA2-GLAN",
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "ALiveSATA2-GLAN"),
+ },
+ },
{}
};
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 1290ba54b350..9c57cb1b33f1 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -113,6 +113,7 @@ static void __save_processor_state(struct saved_context *ctxt)
void save_processor_state(void)
{
__save_processor_state(&saved_context);
+ save_sched_clock_state();
}
#ifdef CONFIG_X86_32
EXPORT_SYMBOL(save_processor_state);
@@ -229,6 +230,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
void restore_processor_state(void)
{
__restore_processor_state(&saved_context);
+ restore_sched_clock_state();
}
#ifdef CONFIG_X86_32
EXPORT_SYMBOL(restore_processor_state);
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 6b4ffedb93c9..dd78ef687c5e 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -25,7 +25,7 @@ targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
export CPPFLAGS_vdso.lds += -P -C
-VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -Wl,-soname=linux-vdso.so.1 \
+VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
@@ -69,7 +69,7 @@ vdso32.so-$(VDSO32-y) += sysenter
vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
-VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1
+VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1
# This makes sure the $(obj) subdirectory exists even though vdso32/
# is not a kbuild sub-make subdirectory.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 65d8d79b46a8..25d787c17ad8 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -927,7 +927,7 @@ static const struct pv_init_ops xen_init_ops __initdata = {
};
static const struct pv_time_ops xen_time_ops __initdata = {
- .sched_clock = xen_sched_clock,
+ .sched_clock = xen_clocksource_read,
};
static const struct pv_cpu_ops xen_cpu_ops __initdata = {
@@ -1000,10 +1000,6 @@ static void xen_reboot(int reason)
{
struct sched_shutdown r = { .reason = reason };
-#ifdef CONFIG_SMP
- smp_send_stop();
-#endif
-
if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
BUG();
}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 914f04695ce5..96cdf7806972 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1142,7 +1142,7 @@ static void drop_other_mm_ref(void *info)
active_mm = percpu_read(cpu_tlbstate.active_mm);
- if (active_mm == mm)
+ if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
leave_mm(smp_processor_id());
/* If this cpu still has a stale cr3 reference, then make sure
@@ -1641,8 +1641,10 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
pte_t pte;
+#ifdef CONFIG_X86_32
if (pfn > max_pfn_mapped)
max_pfn_mapped = pfn;
+#endif
if (!pte_none(pte_page[pteidx]))
continue;
@@ -1687,6 +1689,12 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
pud_t *l3;
pmd_t *l2;
+ /* max_pfn_mapped is the last pfn mapped in the initial memory
+ * mappings. Considering that on Xen after the kernel mappings we
+ * have the mappings of some pages that don't exist in pfn space, we
+ * set max_pfn_mapped to the last real pfn mapped. */
+ max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
+
/* Zap identity mapping */
init_level4_pgt[0] = __pgd(0);
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 8bff7e7c290b..1b2b73ff0a6e 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -189,10 +189,10 @@ struct multicall_space __xen_mc_entry(size_t args)
unsigned argidx = roundup(b->argidx, sizeof(u64));
BUG_ON(preemptible());
- BUG_ON(b->argidx > MC_ARGS);
+ BUG_ON(b->argidx >= MC_ARGS);
if (b->mcidx == MC_BATCH ||
- (argidx + args) > MC_ARGS) {
+ (argidx + args) >= MC_ARGS) {
mc_stats_flush(b->mcidx == MC_BATCH ? FL_SLOTS : FL_ARGS);
xen_mc_flush();
argidx = roundup(b->argidx, sizeof(u64));
@@ -206,7 +206,7 @@ struct multicall_space __xen_mc_entry(size_t args)
ret.args = &b->args[argidx];
b->argidx = argidx + args;
- BUG_ON(b->argidx > MC_ARGS);
+ BUG_ON(b->argidx >= MC_ARGS);
return ret;
}
@@ -216,7 +216,7 @@ struct multicall_space xen_mc_extend_args(unsigned long op, size_t size)
struct multicall_space ret = { NULL, NULL };
BUG_ON(preemptible());
- BUG_ON(b->argidx > MC_ARGS);
+ BUG_ON(b->argidx >= MC_ARGS);
if (b->mcidx == 0)
return ret;
@@ -224,14 +224,14 @@ struct multicall_space xen_mc_extend_args(unsigned long op, size_t size)
if (b->entries[b->mcidx - 1].op != op)
return ret;
- if ((b->argidx + size) > MC_ARGS)
+ if ((b->argidx + size) >= MC_ARGS)
return ret;
ret.mc = &b->entries[b->mcidx - 1];
ret.args = &b->args[b->argidx];
b->argidx += size;
- BUG_ON(b->argidx > MC_ARGS);
+ BUG_ON(b->argidx >= MC_ARGS);
return ret;
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index a29693fd3138..d2dfbf500fc8 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -398,9 +398,9 @@ static void stop_self(void *v)
BUG();
}
-static void xen_smp_send_stop(void)
+static void xen_stop_other_cpus(int wait)
{
- smp_call_function(stop_self, NULL, 0);
+ smp_call_function(stop_self, NULL, wait);
}
static void xen_smp_send_reschedule(int cpu)
@@ -468,7 +468,7 @@ static const struct smp_ops xen_smp_ops __initdata = {
.cpu_disable = xen_cpu_disable,
.play_dead = xen_play_dead,
- .smp_send_stop = xen_smp_send_stop,
+ .stop_other_cpus = xen_stop_other_cpus,
.smp_send_reschedule = xen_smp_send_reschedule,
.send_call_func_ipi = xen_smp_send_call_function_ipi,
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index b3c6c59ed302..41eb583adc8d 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -155,45 +155,6 @@ static void do_stolen_accounting(void)
account_idle_ticks(ticks);
}
-/*
- * Xen sched_clock implementation. Returns the number of unstolen
- * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
- * states.
- */
-unsigned long long xen_sched_clock(void)
-{
- struct vcpu_runstate_info state;
- cycle_t now;
- u64 ret;
- s64 offset;
-
- /*
- * Ideally sched_clock should be called on a per-cpu basis
- * anyway, so preempt should already be disabled, but that's
- * not current practice at the moment.
- */
- preempt_disable();
-
- now = xen_clocksource_read();
-
- get_runstate_snapshot(&state);
-
- WARN_ON(state.state != RUNSTATE_running);
-
- offset = now - state.state_entry_time;
- if (offset < 0)
- offset = 0;
-
- ret = state.time[RUNSTATE_blocked] +
- state.time[RUNSTATE_running] +
- offset;
-
- preempt_enable();
-
- return ret;
-}
-
-
/* Get the TSC speed from Xen */
unsigned long xen_tsc_khz(void)
{
@@ -464,6 +425,8 @@ void xen_timer_resume(void)
{
int cpu;
+ pvclock_resume();
+
if (xen_clockevent != &xen_vcpuop_clockevent)
return;