summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Makefile6
-rw-r--r--arch/x86/boot/compressed/Makefile2
-rw-r--r--arch/x86/configs/i386_defconfig2
-rw-r--r--arch/x86/configs/x86_64_defconfig2
-rw-r--r--arch/x86/entry/entry_32.S8
-rw-r--r--arch/x86/entry/entry_64.S2
-rw-r--r--arch/x86/include/asm/acpi.h2
-rw-r--r--arch/x86/include/asm/apic.h10
-rw-r--r--arch/x86/include/asm/apm.h6
-rw-r--r--arch/x86/include/asm/atomic.h8
-rw-r--r--arch/x86/include/asm/atomic64_64.h8
-rw-r--r--arch/x86/include/asm/barrier.h18
-rw-r--r--arch/x86/include/asm/bitops.h29
-rw-r--r--arch/x86/include/asm/cpu_device_id.h27
-rw-r--r--arch/x86/include/asm/cpufeatures.h4
-rw-r--r--arch/x86/include/asm/dma.h2
-rw-r--r--arch/x86/include/asm/fpu/internal.h30
-rw-r--r--arch/x86/include/asm/insn.h15
-rw-r--r--arch/x86/include/asm/local.h8
-rw-r--r--arch/x86/include/asm/msr-index.h4
-rw-r--r--arch/x86/include/asm/nospec-branch.h5
-rw-r--r--arch/x86/include/asm/paravirt.h7
-rw-r--r--arch/x86/include/asm/paravirt_types.h9
-rw-r--r--arch/x86/include/asm/percpu.h2
-rw-r--r--arch/x86/include/asm/preempt.h2
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/proto.h2
-rw-r--r--arch/x86/include/asm/rmwcc.h4
-rw-r--r--arch/x86/include/asm/stackprotector.h7
-rw-r--r--arch/x86/include/asm/svm.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h11
-rw-r--r--arch/x86/kernel/amd_nb.c2
-rw-r--r--arch/x86/kernel/apic/apic.c4
-rw-r--r--arch/x86/kernel/apic/io_apic.c10
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c3
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c3
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c4
-rw-r--r--arch/x86/kernel/apm_32.c5
-rw-r--r--arch/x86/kernel/asm-offsets.c3
-rw-r--r--arch/x86/kernel/asm-offsets_32.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c26
-rw-r--r--arch/x86/kernel/cpu/bugs.c213
-rw-r--r--arch/x86/kernel/cpu/centaur.c4
-rw-r--r--arch/x86/kernel/cpu/common.c62
-rw-r--r--arch/x86/kernel/cpu/cpu.h1
-rw-r--r--arch/x86/kernel/cpu/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c20
-rw-r--r--arch/x86/kernel/cpu/match.c7
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c53
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c8
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event.c12
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_iommu.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c5
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c2
-rw-r--r--arch/x86/kernel/cpu/proc.c4
-rw-r--r--arch/x86/kernel/crash.c3
-rw-r--r--arch/x86/kernel/fpu/signal.c22
-rw-r--r--arch/x86/kernel/fpu/xstate.c37
-rw-r--r--arch/x86/kernel/head_32.S4
-rw-r--r--arch/x86/kernel/i8259.c2
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c3
-rw-r--r--arch/x86/kernel/kprobes/core.c17
-rw-r--r--arch/x86/kernel/module.c1
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/paravirt.c7
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c2
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c1
-rw-r--r--arch/x86/kernel/process.c28
-rw-r--r--arch/x86/kernel/process.h2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/reboot.c49
-rw-r--r--arch/x86/kernel/smpboot.c8
-rw-r--r--arch/x86/kernel/time.c4
-rw-r--r--arch/x86/kernel/uprobes.c10
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
-rw-r--r--arch/x86/kvm/cpuid.c8
-rw-r--r--arch/x86/kvm/emulate.c10
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/pmu_intel.c2
-rw-r--r--arch/x86/kvm/svm.c16
-rw-r--r--arch/x86/kvm/vmx.c5
-rw-r--r--arch/x86/kvm/x86.c23
-rw-r--r--arch/x86/lib/msr-smp.c4
-rw-r--r--arch/x86/math-emu/wm_sqrt.S2
-rw-r--r--arch/x86/mm/dump_pagetables.c2
-rw-r--r--arch/x86/mm/init.c2
-rw-r--r--arch/x86/mm/init_64.c6
-rw-r--r--arch/x86/mm/mmio-mod.c4
-rw-r--r--arch/x86/mm/pgtable.c2
-rw-r--r--arch/x86/net/bpf_jit_comp.c29
-rw-r--r--arch/x86/tools/chkobjdump.awk1
-rw-r--r--arch/x86/tools/relocs.c12
-rw-r--r--arch/x86/xen/enlighten.c18
-rw-r--r--arch/x86/xen/p2m.c63
-rw-r--r--arch/x86/xen/smp.c1
-rw-r--r--arch/x86/xen/spinlock.c12
-rw-r--r--arch/x86/xen/xen-asm_32.S14
-rw-r--r--arch/x86/xen/xen-ops.h3
100 files changed, 757 insertions, 405 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 8b4d022ce0cb..c0045e3ad0f5 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -34,12 +34,13 @@ REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \
-DDISABLE_BRANCH_PROFILING \
-Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
-fno-strict-aliasing -fomit-frame-pointer -fno-pic \
- -mno-mmx -mno-sse
+ -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none)
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -ffreestanding)
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -fno-stack-protector)
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -Wno-address-of-packed-member)
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4))
+REALMODE_CFLAGS += $(CLANG_FLAGS)
export REALMODE_CFLAGS
# BITS is used as extension for files which are available in a 32 bit
@@ -61,6 +62,9 @@ endif
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
+# Intel CET isn't enabled in the kernel
+KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
+
ifeq ($(CONFIG_X86_32),y)
BITS := 32
UTS_MACHINE := i386
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index bf0c7b6b00c3..01eafd8aeec6 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -31,6 +31,8 @@ KBUILD_CFLAGS += -mno-mmx -mno-sse
KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
+# Disable relocation relaxation in case the link is not PIE.
+KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 028be48c8839..a628b2474e6d 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -218,7 +218,6 @@ CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_EFI=y
# CONFIG_LCD_CLASS_DEVICE is not set
-CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
@@ -248,6 +247,7 @@ CONFIG_USB_HIDDEV=y
CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_MON=y
+CONFIG_USB_XHCI_HCD=y
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_TT_NEWSCHED=y
CONFIG_USB_OHCI_HCD=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index cb5b3ab5beec..649f7d604b12 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -212,7 +212,6 @@ CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_EFI=y
# CONFIG_LCD_CLASS_DEVICE is not set
-CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
@@ -242,6 +241,7 @@ CONFIG_USB_HIDDEV=y
CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_MON=y
+CONFIG_USB_XHCI_HCD=y
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_TT_NEWSCHED=y
CONFIG_USB_OHCI_HCD=y
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 575c9afeba9b..217b60246cbb 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -331,7 +331,8 @@ sysenter_past_esp:
* Return back to the vDSO, which will pop ecx and edx.
* Don't bother with DS and ES (they already contain __USER_DS).
*/
- ENABLE_INTERRUPTS_SYSEXIT
+ sti
+ sysexit
.pushsection .fixup, "ax"
2: movl $0, PT_FS(%esp)
@@ -554,11 +555,6 @@ ENTRY(native_iret)
iret
_ASM_EXTABLE(native_iret, iret_exc)
END(native_iret)
-
-ENTRY(native_irq_enable_sysexit)
- sti
- sysexit
-END(native_irq_enable_sysexit)
#endif
ENTRY(overflow)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 6b505d91cac5..eec6defbe733 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -57,7 +57,7 @@ ENDPROC(native_usergs_sysret64)
.macro TRACE_IRQS_IRETQ
#ifdef CONFIG_TRACE_IRQFLAGS
- bt $9, EFLAGS(%rsp) /* interrupts off? */
+ btl $9, EFLAGS(%rsp) /* interrupts off? */
jnc 1f
TRACE_IRQS_ON
1:
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 94c18ebfd68c..fd51f638e4ab 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -92,7 +92,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
if (boot_cpu_data.x86 == 0x0F &&
boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
boot_cpu_data.x86_model <= 0x05 &&
- boot_cpu_data.x86_mask < 0x0A)
+ boot_cpu_data.x86_stepping < 0x0A)
return 1;
else if (amd_e400_c1e_detected)
return 1;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3328a37ddc75..34f11bc42d9b 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -168,16 +168,6 @@ static inline void disable_local_APIC(void) { }
#endif /* !CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_X2APIC
-/*
- * Make previous memory operations globally visible before
- * sending the IPI through x2apic wrmsr. We need a serializing instruction or
- * mfence for this.
- */
-static inline void x2apic_wrmsr_fence(void)
-{
- asm volatile("mfence" : : : "memory");
-}
-
static inline void native_apic_msr_write(u32 reg, u32 v)
{
if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index 3d1ec41ae09a..20370c6db74b 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -6,8 +6,6 @@
#ifndef _ASM_X86_MACH_DEFAULT_APM_H
#define _ASM_X86_MACH_DEFAULT_APM_H
-#include <asm/nospec-branch.h>
-
#ifdef APM_ZERO_SEGS
# define APM_DO_ZERO_SEGS \
"pushl %%ds\n\t" \
@@ -33,7 +31,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
- firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@@ -46,7 +43,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
"=S" (*esi)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
- firmware_restrict_branch_speculation_end();
}
static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@@ -59,7 +55,6 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
- firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@@ -72,7 +67,6 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
"=S" (si)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
- firmware_restrict_branch_speculation_end();
return error;
}
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index afc2387323c9..47cb64dd319a 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -77,7 +77,7 @@ static __always_inline void atomic_sub(int i, atomic_t *v)
*/
static __always_inline int atomic_sub_and_test(int i, atomic_t *v)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e");
+ GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
}
/**
@@ -114,7 +114,7 @@ static __always_inline void atomic_dec(atomic_t *v)
*/
static __always_inline int atomic_dec_and_test(atomic_t *v)
{
- GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
+ GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
}
/**
@@ -127,7 +127,7 @@ static __always_inline int atomic_dec_and_test(atomic_t *v)
*/
static __always_inline int atomic_inc_and_test(atomic_t *v)
{
- GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e");
+ GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
}
/**
@@ -141,7 +141,7 @@ static __always_inline int atomic_inc_and_test(atomic_t *v)
*/
static __always_inline int atomic_add_negative(int i, atomic_t *v)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s");
+ GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
}
/**
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 377fa50cc271..fbb9a82599ab 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -72,7 +72,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)
*/
static inline int atomic64_sub_and_test(long i, atomic64_t *v)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", "e");
+ GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
}
/**
@@ -111,7 +111,7 @@ static __always_inline void atomic64_dec(atomic64_t *v)
*/
static inline int atomic64_dec_and_test(atomic64_t *v)
{
- GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e");
+ GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
}
/**
@@ -124,7 +124,7 @@ static inline int atomic64_dec_and_test(atomic64_t *v)
*/
static inline int atomic64_inc_and_test(atomic64_t *v)
{
- GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e");
+ GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
}
/**
@@ -138,7 +138,7 @@ static inline int atomic64_inc_and_test(atomic64_t *v)
*/
static inline int atomic64_add_negative(long i, atomic64_t *v)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", "s");
+ GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
}
/**
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index b2a5bef74282..134d7ffc662e 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -119,4 +119,22 @@ do { \
#define smp_mb__before_atomic() do { } while (0)
#define smp_mb__after_atomic() do { } while (0)
+/*
+ * Make previous memory operations globally visible before
+ * a WRMSR.
+ *
+ * MFENCE makes writes visible, but only affects load/store
+ * instructions. WRMSR is unfortunately not a load/store
+ * instruction and is unaffected by MFENCE. The LFENCE ensures
+ * that the WRMSR is not reordered.
+ *
+ * Most WRMSRs are full serializing instructions themselves and
+ * do not require this barrier. This is only required for the
+ * IA32_TSC_DEADLINE and X2APIC MSRs.
+ */
+static inline void weak_wrmsr_fence(void)
+{
+ asm volatile("mfence; lfence" : : : "memory");
+}
+
#endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index cfe3b954d5e4..390e323a4de9 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -77,7 +77,7 @@ set_bit(long nr, volatile unsigned long *addr)
: "iq" ((u8)CONST_MASK(nr))
: "memory");
} else {
- asm volatile(LOCK_PREFIX "bts %1,%0"
+ asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
: BITOP_ADDR(addr) : "Ir" (nr) : "memory");
}
}
@@ -93,7 +93,7 @@ set_bit(long nr, volatile unsigned long *addr)
*/
static inline void __set_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
+ asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
}
/**
@@ -114,7 +114,7 @@ clear_bit(long nr, volatile unsigned long *addr)
: CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)~CONST_MASK(nr)));
} else {
- asm volatile(LOCK_PREFIX "btr %1,%0"
+ asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
: BITOP_ADDR(addr)
: "Ir" (nr));
}
@@ -136,7 +136,7 @@ static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
static inline void __clear_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
+ asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
}
/*
@@ -168,7 +168,7 @@ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
*/
static inline void __change_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
+ asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
}
/**
@@ -187,7 +187,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr)
: CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)CONST_MASK(nr)));
} else {
- asm volatile(LOCK_PREFIX "btc %1,%0"
+ asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
: BITOP_ADDR(addr)
: "Ir" (nr));
}
@@ -203,7 +203,8 @@ static inline void change_bit(long nr, volatile unsigned long *addr)
*/
static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
+ GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts),
+ *addr, "Ir", nr, "%0", c);
}
/**
@@ -232,7 +233,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
- asm("bts %2,%1\n\t"
+ asm(__ASM_SIZE(bts) " %2,%1\n\t"
"sbb %0,%0"
: "=r" (oldbit), ADDR
: "Ir" (nr));
@@ -249,7 +250,8 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
*/
static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
+ GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr),
+ *addr, "Ir", nr, "%0", c);
}
/**
@@ -272,7 +274,7 @@ static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
- asm volatile("btr %2,%1\n\t"
+ asm volatile(__ASM_SIZE(btr) " %2,%1\n\t"
"sbb %0,%0"
: "=r" (oldbit), ADDR
: "Ir" (nr));
@@ -284,7 +286,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
- asm volatile("btc %2,%1\n\t"
+ asm volatile(__ASM_SIZE(btc) " %2,%1\n\t"
"sbb %0,%0"
: "=r" (oldbit), ADDR
: "Ir" (nr) : "memory");
@@ -302,7 +304,8 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
*/
static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
+ GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc),
+ *addr, "Ir", nr, "%0", c);
}
static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr)
@@ -315,7 +318,7 @@ static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
{
int oldbit;
- asm volatile("bt %2,%1\n\t"
+ asm volatile(__ASM_SIZE(bt) " %2,%1\n\t"
"sbb %0,%0"
: "=r" (oldbit)
: "m" (*(unsigned long *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
index ff501e511d91..b9473858c6b6 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -8,6 +8,33 @@
#include <linux/mod_devicetable.h>
+#define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins)
+
+/**
+ * X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching
+ * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ * The name is expanded to X86_VENDOR_@_vendor
+ * @_family: The family number or X86_FAMILY_ANY
+ * @_model: The model number, model constant or X86_MODEL_ANY
+ * @_steppings: Bitmask for steppings, stepping constant or X86_STEPPING_ANY
+ * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY
+ * @_data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is casted to unsigned long internally.
+ *
+ * Backport version to keep the SRBDS pile consistant. No shorter variants
+ * required for this.
+ */
+#define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \
+ _steppings, _feature, _data) { \
+ .vendor = X86_VENDOR_##_vendor, \
+ .family = _family, \
+ .model = _model, \
+ .steppings = _steppings, \
+ .feature = _feature, \
+ .driver_data = (unsigned long) _data \
+}
+
extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
#endif
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 01ec126c5969..7c7fc5006017 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -270,6 +270,7 @@
#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
@@ -308,10 +309,10 @@
#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
-
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
@@ -343,5 +344,6 @@
#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
+#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h
index fe884e18fa6e..c7854a098b6b 100644
--- a/arch/x86/include/asm/dma.h
+++ b/arch/x86/include/asm/dma.h
@@ -73,7 +73,7 @@
#define MAX_DMA_PFN ((16UL * 1024 * 1024) >> PAGE_SHIFT)
/* 4GB broken PCI/AGP hardware bus master zone */
-#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
+#define MAX_DMA32_PFN (1UL << (32 - PAGE_SHIFT))
#ifdef CONFIG_X86_32
/* The maximum address that we can perform a DMA transfer to on this platform */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 66a5e60f60c4..4fb38927128c 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -217,6 +217,14 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
}
}
+static inline void fxsave(struct fxregs_state *fx)
+{
+ if (IS_ENABLED(CONFIG_X86_32))
+ asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx));
+ else
+ asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx));
+}
+
/* These macros all use (%edi)/(%rdi) as the single memory argument. */
#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
@@ -290,28 +298,6 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
* This function is called only during boot time when x86 caps are not set
* up and alternative can not be used yet.
*/
-static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
-{
- u64 mask = -1;
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err;
-
- WARN_ON(system_state != SYSTEM_BOOTING);
-
- if (static_cpu_has(X86_FEATURE_XSAVES))
- XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
- else
- XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
-
- /* We should never fault when copying to a kernel buffer: */
- WARN_ON_FPU(err);
-}
-
-/*
- * This function is called only during boot time when x86 caps are not set
- * up and alternative can not be used yet.
- */
static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
{
u64 mask = -1;
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 5a51fcbbe563..6db02d52cdf4 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -198,6 +198,21 @@ static inline int insn_offset_immediate(struct insn *insn)
return insn_offset_displacement(insn) + insn->displacement.nbytes;
}
+/**
+ * for_each_insn_prefix() -- Iterate prefixes in the instruction
+ * @insn: Pointer to struct insn.
+ * @idx: Index storage.
+ * @prefix: Prefix byte.
+ *
+ * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
+ * and the index is stored in @idx (note that this @idx is just for a cursor,
+ * do not change it.)
+ * Since prefixes.nbytes can be bigger than 4 if some prefixes
+ * are repeated, it cannot be used for looping over the prefixes.
+ */
+#define for_each_insn_prefix(insn, idx, prefix) \
+ for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+
#define POP_SS_OPCODE 0x1f
#define MOV_SREG_OPCODE 0x8e
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 4ad6560847b1..53238f0da79e 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -52,7 +52,7 @@ static inline void local_sub(long i, local_t *l)
*/
static inline int local_sub_and_test(long i, local_t *l)
{
- GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", "e");
+ GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", e);
}
/**
@@ -65,7 +65,7 @@ static inline int local_sub_and_test(long i, local_t *l)
*/
static inline int local_dec_and_test(local_t *l)
{
- GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e");
+ GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", e);
}
/**
@@ -78,7 +78,7 @@ static inline int local_dec_and_test(local_t *l)
*/
static inline int local_inc_and_test(local_t *l)
{
- GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e");
+ GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", e);
}
/**
@@ -92,7 +92,7 @@ static inline int local_inc_and_test(local_t *l)
*/
static inline int local_add_negative(long i, local_t *l)
{
- GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", "s");
+ GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", s);
}
/**
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 854a20efa771..4ee65ec8c29c 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -91,6 +91,10 @@
#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */
#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */
+/* SRBDS support */
+#define MSR_IA32_MCU_OPT_CTRL 0x00000123
+#define RNGDS_MITG_DIS BIT(0)
+
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
#define MSR_IA32_SYSENTER_EIP 0x00000176
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 783f0711895b..2f84887e8934 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -178,6 +178,7 @@ enum spectre_v2_mitigation {
enum spectre_v2_user_mitigation {
SPECTRE_V2_USER_NONE,
SPECTRE_V2_USER_STRICT,
+ SPECTRE_V2_USER_STRICT_PREFERRED,
SPECTRE_V2_USER_PRCTL,
SPECTRE_V2_USER_SECCOMP,
};
@@ -274,7 +275,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
* combination with microcode which triggers a CPU buffer flush when the
* instruction is executed.
*/
-static inline void mds_clear_cpu_buffers(void)
+static __always_inline void mds_clear_cpu_buffers(void)
{
static const u16 ds = __KERNEL_DS;
@@ -295,7 +296,7 @@ static inline void mds_clear_cpu_buffers(void)
*
* Clear CPU buffers if the corresponding static key is enabled
*/
-static inline void mds_user_clear_cpu_buffers(void)
+static __always_inline void mds_user_clear_cpu_buffers(void)
{
if (static_branch_likely(&mds_user_clear))
mds_clear_cpu_buffers();
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c759b3cca663..b4c5099cafee 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -938,13 +938,6 @@ extern void default_banner(void);
push %ecx; push %edx; \
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
pop %edx; pop %ecx
-
-#define ENABLE_INTERRUPTS_SYSEXIT \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
- CLBR_NONE, \
- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
-
-
#else /* !CONFIG_X86_32 */
/*
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 3d44191185f8..cc0e5a666c9e 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -162,15 +162,6 @@ struct pv_cpu_ops {
u64 (*read_pmc)(int counter);
-#ifdef CONFIG_X86_32
- /*
- * Atomically enable interrupts and return to userspace. This
- * is only used in 32-bit kernels. 64-bit kernels use
- * usergs_sysret32 instead.
- */
- void (*irq_enable_sysexit)(void);
-#endif
-
/*
* Switch to usermode gs and return to 64-bit usermode using
* sysret. Only used in 64-bit kernels to return to 64-bit
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index f5e780bfa2b3..66cd0c862a80 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -534,7 +534,7 @@ static inline int x86_this_cpu_variable_test_bit(int nr,
{
int oldbit;
- asm volatile("bt "__percpu_arg(2)",%1\n\t"
+ asm volatile("btl "__percpu_arg(2)",%1\n\t"
"sbb %0,%0"
: "=r" (oldbit)
: "m" (*(unsigned long *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 01bcde84d3e4..ad6661ca315d 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -81,7 +81,7 @@ static __always_inline void __preempt_count_sub(int val)
*/
static __always_inline bool __preempt_count_dec_and_test(void)
{
- GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
+ GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e);
}
/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 048942d53988..55163bc810db 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -88,7 +88,7 @@ struct cpuinfo_x86 {
__u8 x86; /* CPU family */
__u8 x86_vendor; /* CPU vendor */
__u8 x86_model;
- __u8 x86_mask;
+ __u8 x86_stepping;
#ifdef CONFIG_X86_32
char wp_works_ok; /* It doesn't on 386's */
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index a4a77286cb1d..ae6f1592530b 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -3,6 +3,8 @@
#include <asm/ldt.h>
+struct task_struct;
+
/* misc architecture specific prototypes */
void syscall_init(void);
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 8f7866a5b9a4..cb0dce0273c8 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -5,7 +5,7 @@
#define __GEN_RMWcc(fullop, var, cc, ...) \
do { \
- asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \
+ asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \
: : "m" (var), ## __VA_ARGS__ \
: "memory" : cc_label); \
return 0; \
@@ -24,7 +24,7 @@ cc_label: \
#define __GEN_RMWcc(fullop, var, cc, ...) \
do { \
char c; \
- asm volatile (fullop "; set" cc " %1" \
+ asm volatile (fullop "; set" #cc " %1" \
: "+m" (var), "=qm" (c) \
: __VA_ARGS__ : "memory"); \
return c != 0; \
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 58505f01962f..743bd2d77e51 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -54,8 +54,13 @@
/*
* Initialize the stackprotector canary value.
*
- * NOTE: this must only be called from functions that never return,
+ * NOTE: this must only be called from functions that never return
* and it must always be inlined.
+ *
+ * In addition, it should be called from a compilation unit for which
+ * stack protector is disabled. Alternatively, the caller should not end
+ * with a function call which gets tail-call optimized as that would
+ * lead to checking a modified canary value.
*/
static __always_inline void boot_init_stack_canary(void)
{
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 6136d99f537b..c1adb2ed6d41 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -108,6 +108,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define V_IGN_TPR_SHIFT 20
#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+#define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK)
+
#define V_INTR_MASKING_SHIFT 24
#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 8dab88b85785..33a594f728de 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -245,12 +245,15 @@ static inline void __native_flush_tlb_single(unsigned long addr)
* ASID. But, userspace flushes are probably much more
* important performance-wise.
*
- * Make sure to do only a single invpcid when KAISER is
- * disabled and we have only a single ASID.
+ * In the KAISER disabled case, do an INVLPG to make sure
+ * the mapping is flushed in case it is a global one.
*/
- if (kaiser_enabled)
+ if (kaiser_enabled) {
invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
- invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
+ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
+ } else {
+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ }
}
static inline void __flush_tlb_all(void)
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index c986d0b3bc35..df9ee8d768bf 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -105,7 +105,7 @@ int amd_cache_northbridges(void)
if (boot_cpu_data.x86 == 0x10 &&
boot_cpu_data.x86_model >= 0x8 &&
(boot_cpu_data.x86_model > 0x9 ||
- boot_cpu_data.x86_mask >= 0x1))
+ boot_cpu_data.x86_stepping >= 0x1))
amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
if (boot_cpu_data.x86 == 0x15)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 4dcf71c26d64..f53849f3f7fb 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -41,6 +41,7 @@
#include <asm/x86_init.h>
#include <asm/pgalloc.h>
#include <linux/atomic.h>
+#include <asm/barrier.h>
#include <asm/mpspec.h>
#include <asm/i8259.h>
#include <asm/proto.h>
@@ -464,6 +465,9 @@ static int lapic_next_deadline(unsigned long delta,
{
u64 tsc;
+ /* This MSR is special and need a special fence: */
+ weak_wrmsr_fence();
+
tsc = rdtsc();
wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
return 0;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5e8fc9809da3..497ad354e123 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1040,6 +1040,16 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,
if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) {
irq = mp_irqs[idx].srcbusirq;
legacy = mp_is_legacy_irq(irq);
+ /*
+ * IRQ2 is unusable for historical reasons on systems which
+ * have a legacy PIC. See the comment vs. IRQ2 further down.
+ *
+ * If this gets removed at some point then the related code
+ * in lapic_assign_system_vectors() needs to be adjusted as
+ * well.
+ */
+ if (legacy && irq == PIC_CASCADE_IR)
+ return -EINVAL;
}
mutex_lock(&ioapic_mutex);
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index cc8311c4d298..f474756fc151 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -32,7 +32,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
unsigned long flags;
u32 dest;
- x2apic_wrmsr_fence();
+ /* x2apic MSRs are special and need a special fence: */
+ weak_wrmsr_fence();
local_irq_save(flags);
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 662e9150ea6f..ad7c3544b07f 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -43,7 +43,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
unsigned long this_cpu;
unsigned long flags;
- x2apic_wrmsr_fence();
+ /* x2apic MSRs are special and need a special fence: */
+ weak_wrmsr_fence();
local_irq_save(flags);
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 4a139465f1d4..7554075414d4 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -648,9 +648,9 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
l = li;
}
addr1 = (base << shift) +
- f * (unsigned long)(1 << m_io);
+ f * (1ULL << m_io);
addr2 = (base << shift) +
- (l + 1) * (unsigned long)(1 << m_io);
+ (l + 1) * (1ULL << m_io);
pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n",
id, fi, li, lnasid, addr1, addr2);
if (max_io < l)
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 052c9c3026cc..dfdbe01ef9f2 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -239,6 +239,7 @@
#include <asm/olpc.h>
#include <asm/paravirt.h>
#include <asm/reboot.h>
+#include <asm/nospec-branch.h>
#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
extern int (*console_blank_hook)(int);
@@ -613,11 +614,13 @@ static long __apm_bios_call(void *_call)
gdt[0x40 / 8] = bad_bios_desc;
apm_irq_save(flags);
+ firmware_restrict_branch_speculation_start();
APM_DO_SAVE_SEGS;
apm_bios_call_asm(call->func, call->ebx, call->ecx,
&call->eax, &call->ebx, &call->ecx, &call->edx,
&call->esi);
APM_DO_RESTORE_SEGS;
+ firmware_restrict_branch_speculation_end();
apm_irq_restore(flags);
gdt[0x40 / 8] = save_desc_40;
put_cpu();
@@ -689,10 +692,12 @@ static long __apm_bios_call_simple(void *_call)
gdt[0x40 / 8] = bad_bios_desc;
apm_irq_save(flags);
+ firmware_restrict_branch_speculation_start();
APM_DO_SAVE_SEGS;
error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx,
&call->eax);
APM_DO_RESTORE_SEGS;
+ firmware_restrict_branch_speculation_end();
apm_irq_restore(flags);
gdt[0x40 / 8] = save_desc_40;
put_cpu();
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 439df975bc7a..84a7524b202c 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -65,9 +65,6 @@ void common(void) {
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
-#ifdef CONFIG_X86_32
- OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
-#endif
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
#endif
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 6ce39025f467..f876141e380c 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -20,7 +20,7 @@ void foo(void)
OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
- OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
+ OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 424d8a636615..b8fbe983277b 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -112,7 +112,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
return;
}
- if (c->x86_model == 6 && c->x86_mask == 1) {
+ if (c->x86_model == 6 && c->x86_stepping == 1) {
const int K6_BUG_LOOP = 1000000;
int n;
void (*f_vide)(void);
@@ -142,7 +142,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
/* K6 with old style WHCR */
if (c->x86_model < 8 ||
- (c->x86_model == 8 && c->x86_mask < 8)) {
+ (c->x86_model == 8 && c->x86_stepping < 8)) {
/* We can only write allocate on the low 508Mb */
if (mbytes > 508)
mbytes = 508;
@@ -161,7 +161,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
return;
}
- if ((c->x86_model == 8 && c->x86_mask > 7) ||
+ if ((c->x86_model == 8 && c->x86_stepping > 7) ||
c->x86_model == 9 || c->x86_model == 13) {
/* The more serious chips .. */
@@ -214,7 +214,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
* are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
* As per AMD technical note 27212 0.2
*/
- if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
+ if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) {
rdmsr(MSR_K7_CLK_CTL, l, h);
if ((l & 0xfff00000) != 0x20000000) {
printk(KERN_INFO
@@ -235,12 +235,12 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
* but they are not certified as MP capable.
*/
/* Athlon 660/661 is valid. */
- if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
- (c->x86_mask == 1)))
+ if ((c->x86_model == 6) && ((c->x86_stepping == 0) ||
+ (c->x86_stepping == 1)))
return;
/* Duron 670 is valid */
- if ((c->x86_model == 7) && (c->x86_mask == 0))
+ if ((c->x86_model == 7) && (c->x86_stepping == 0))
return;
/*
@@ -250,8 +250,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
* See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
* more.
*/
- if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
- ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
+ if (((c->x86_model == 6) && (c->x86_stepping >= 2)) ||
+ ((c->x86_model == 7) && (c->x86_stepping >= 1)) ||
(c->x86_model > 7))
if (cpu_has(c, X86_FEATURE_MP))
return;
@@ -563,7 +563,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
/* Set MTRR capability flag if appropriate */
if (c->x86 == 5)
if (c->x86_model == 13 || c->x86_model == 9 ||
- (c->x86_model == 8 && c->x86_mask >= 8))
+ (c->x86_model == 8 && c->x86_stepping >= 8))
set_cpu_cap(c, X86_FEATURE_K6_MTRR);
#endif
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
@@ -902,11 +902,11 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
/* AMD errata T13 (order #21922) */
if ((c->x86 == 6)) {
/* Duron Rev A0 */
- if (c->x86_model == 3 && c->x86_mask == 0)
+ if (c->x86_model == 3 && c->x86_stepping == 0)
size = 64;
/* Tbird rev A1/A2 */
if (c->x86_model == 4 &&
- (c->x86_mask == 0 || c->x86_mask == 1))
+ (c->x86_stepping == 0 || c->x86_stepping == 1))
size = 256;
}
return size;
@@ -1043,7 +1043,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
}
/* OSVW unavailable or ID unknown, match family-model-stepping range */
- ms = (cpu->x86_model << 4) | cpu->x86_mask;
+ ms = (cpu->x86_model << 4) | cpu->x86_stepping;
while ((range = *erratum++))
if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
(ms >= AMD_MODEL_RANGE_START(range)) &&
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 950e6bb21955..ffc3bc8111b8 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -39,6 +39,7 @@ static void __init l1tf_select_mitigation(void);
static void __init mds_select_mitigation(void);
static void __init mds_print_mitigation(void);
static void __init taa_select_mitigation(void);
+static void __init srbds_select_mitigation(void);
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
u64 x86_spec_ctrl_base;
@@ -58,7 +59,7 @@ static u64 x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
u64 x86_amd_ls_cfg_base;
u64 x86_amd_ls_cfg_ssbd_mask;
-/* Control conditional STIPB in switch_to() */
+/* Control conditional STIBP in switch_to() */
DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp);
/* Control conditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
@@ -99,6 +100,7 @@ void __init check_bugs(void)
l1tf_select_mitigation();
mds_select_mitigation();
taa_select_mitigation();
+ srbds_select_mitigation();
/*
* As MDS and TAA mitigations are inter-related, print MDS
@@ -369,6 +371,97 @@ static int __init tsx_async_abort_parse_cmdline(char *str)
early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
#undef pr_fmt
+#define pr_fmt(fmt) "SRBDS: " fmt
+
+enum srbds_mitigations {
+ SRBDS_MITIGATION_OFF,
+ SRBDS_MITIGATION_UCODE_NEEDED,
+ SRBDS_MITIGATION_FULL,
+ SRBDS_MITIGATION_TSX_OFF,
+ SRBDS_MITIGATION_HYPERVISOR,
+};
+
+static enum srbds_mitigations srbds_mitigation = SRBDS_MITIGATION_FULL;
+
+static const char * const srbds_strings[] = {
+ [SRBDS_MITIGATION_OFF] = "Vulnerable",
+ [SRBDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode",
+ [SRBDS_MITIGATION_FULL] = "Mitigation: Microcode",
+ [SRBDS_MITIGATION_TSX_OFF] = "Mitigation: TSX disabled",
+ [SRBDS_MITIGATION_HYPERVISOR] = "Unknown: Dependent on hypervisor status",
+};
+
+static bool srbds_off;
+
+void update_srbds_msr(void)
+{
+ u64 mcu_ctrl;
+
+ if (!boot_cpu_has_bug(X86_BUG_SRBDS))
+ return;
+
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return;
+
+ if (srbds_mitigation == SRBDS_MITIGATION_UCODE_NEEDED)
+ return;
+
+ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+
+ switch (srbds_mitigation) {
+ case SRBDS_MITIGATION_OFF:
+ case SRBDS_MITIGATION_TSX_OFF:
+ mcu_ctrl |= RNGDS_MITG_DIS;
+ break;
+ case SRBDS_MITIGATION_FULL:
+ mcu_ctrl &= ~RNGDS_MITG_DIS;
+ break;
+ default:
+ break;
+ }
+
+ wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+}
+
+static void __init srbds_select_mitigation(void)
+{
+ u64 ia32_cap;
+
+ if (!boot_cpu_has_bug(X86_BUG_SRBDS))
+ return;
+
+ /*
+ * Check to see if this is one of the MDS_NO systems supporting
+ * TSX that are only exposed to SRBDS when TSX is enabled.
+ */
+ ia32_cap = x86_read_arch_cap_msr();
+ if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM))
+ srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
+ else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR;
+ else if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL))
+ srbds_mitigation = SRBDS_MITIGATION_UCODE_NEEDED;
+ else if (cpu_mitigations_off() || srbds_off)
+ srbds_mitigation = SRBDS_MITIGATION_OFF;
+
+ update_srbds_msr();
+ pr_info("%s\n", srbds_strings[srbds_mitigation]);
+}
+
+static int __init srbds_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!boot_cpu_has_bug(X86_BUG_SRBDS))
+ return 0;
+
+ srbds_off = !strcmp(str, "off");
+ return 0;
+}
+early_param("srbds", srbds_parse_cmdline);
+
+#undef pr_fmt
#define pr_fmt(fmt) "Spectre V1 : " fmt
enum spectre_v1_mitigation {
@@ -465,7 +558,8 @@ early_param("nospectre_v1", nospectre_v1_cmdline);
static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
-static enum spectre_v2_user_mitigation spectre_v2_user = SPECTRE_V2_USER_NONE;
+static enum spectre_v2_user_mitigation spectre_v2_user_stibp = SPECTRE_V2_USER_NONE;
+static enum spectre_v2_user_mitigation spectre_v2_user_ibpb = SPECTRE_V2_USER_NONE;
#ifdef RETPOLINE
static bool spectre_v2_bad_module;
@@ -516,10 +610,11 @@ enum spectre_v2_user_cmd {
};
static const char * const spectre_v2_user_strings[] = {
- [SPECTRE_V2_USER_NONE] = "User space: Vulnerable",
- [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection",
- [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl",
- [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl",
+ [SPECTRE_V2_USER_NONE] = "User space: Vulnerable",
+ [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection",
+ [SPECTRE_V2_USER_STRICT_PREFERRED] = "User space: Mitigation: STIBP always-on protection",
+ [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl",
+ [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl",
};
static const struct {
@@ -612,11 +707,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
if (boot_cpu_has(X86_FEATURE_IBPB)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
+ spectre_v2_user_ibpb = mode;
switch (cmd) {
case SPECTRE_V2_USER_CMD_FORCE:
case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
static_branch_enable(&switch_mm_always_ibpb);
+ spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT;
break;
case SPECTRE_V2_USER_CMD_PRCTL:
case SPECTRE_V2_USER_CMD_AUTO:
@@ -632,21 +729,32 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
"always-on" : "conditional");
}
- /* If enhanced IBRS is enabled no STIPB required */
- if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+ /*
+ * If enhanced IBRS is enabled or SMT impossible, STIBP is not
+ * required.
+ */
+ if (!smt_possible || spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return;
/*
- * If SMT is not possible or STIBP is not available clear the STIPB
- * mode.
+ * At this point, an STIBP mode other than "off" has been set.
+ * If STIBP support is not being forced, check if STIBP always-on
+ * is preferred.
*/
- if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
+ if (mode != SPECTRE_V2_USER_STRICT &&
+ boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
+ mode = SPECTRE_V2_USER_STRICT_PREFERRED;
+
+ /*
+ * If STIBP is not available, clear the STIBP mode.
+ */
+ if (!boot_cpu_has(X86_FEATURE_STIBP))
mode = SPECTRE_V2_USER_NONE;
+
+ spectre_v2_user_stibp = mode;
+
set_mode:
- spectre_v2_user = mode;
- /* Only print the STIBP mode when SMT possible */
- if (smt_possible)
- pr_info("%s\n", spectre_v2_user_strings[mode]);
+ pr_info("%s\n", spectre_v2_user_strings[mode]);
}
static const char * const spectre_v2_strings[] = {
@@ -886,10 +994,11 @@ void arch_smt_update(void)
{
mutex_lock(&spec_ctrl_mutex);
- switch (spectre_v2_user) {
+ switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
break;
case SPECTRE_V2_USER_STRICT:
+ case SPECTRE_V2_USER_STRICT_PREFERRED:
update_stibp_strict();
break;
case SPECTRE_V2_USER_PRCTL:
@@ -1114,18 +1223,41 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
return 0;
}
+static bool is_spec_ib_user_controlled(void)
+{
+ return spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL ||
+ spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP;
+}
+
static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
{
switch (ctrl) {
case PR_SPEC_ENABLE:
- if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+ spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return 0;
+
/*
- * Indirect branch speculation is always disabled in strict
- * mode.
+ * With strict mode for both IBPB and STIBP, the instruction
+ * code paths avoid checking this task flag and instead,
+ * unconditionally run the instruction. However, STIBP and IBPB
+ * are independent and either can be set to conditionally
+ * enabled regardless of the mode of the other.
+ *
+ * If either is set to conditional, allow the task flag to be
+ * updated, unless it was force-disabled by a previous prctl
+ * call. Currently, this is possible on an AMD CPU which has the
+ * feature X86_FEATURE_AMD_STIBP_ALWAYS_ON. In this case, if the
+ * kernel is booted with 'spectre_v2_user=seccomp', then
+ * spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP and
+ * spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED.
*/
- if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
+ if (!is_spec_ib_user_controlled() ||
+ task_spec_ib_force_disable(task))
return -EPERM;
+
task_clear_spec_ib_disable(task);
task_update_spec_tif(task);
break;
@@ -1135,10 +1267,13 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
* Indirect branch speculation is always allowed when
* mitigation is force disabled.
*/
- if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+ spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return -EPERM;
- if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
+
+ if (!is_spec_ib_user_controlled())
return 0;
+
task_set_spec_ib_disable(task);
if (ctrl == PR_SPEC_FORCE_DISABLE)
task_set_spec_ib_force_disable(task);
@@ -1168,7 +1303,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
{
if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
- if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP)
ib_prctl_set(task, PR_SPEC_FORCE_DISABLE);
}
#endif
@@ -1197,21 +1333,21 @@ static int ib_prctl_get(struct task_struct *task)
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
return PR_SPEC_NOT_AFFECTED;
- switch (spectre_v2_user) {
- case SPECTRE_V2_USER_NONE:
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+ spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return PR_SPEC_ENABLE;
- case SPECTRE_V2_USER_PRCTL:
- case SPECTRE_V2_USER_SECCOMP:
+ else if (is_spec_ib_user_controlled()) {
if (task_spec_ib_force_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
if (task_spec_ib_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
- case SPECTRE_V2_USER_STRICT:
+ } else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
return PR_SPEC_DISABLE;
- default:
+ else
return PR_SPEC_NOT_AFFECTED;
- }
}
int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
@@ -1352,11 +1488,13 @@ static char *stibp_state(void)
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return "";
- switch (spectre_v2_user) {
+ switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
return ", STIBP: disabled";
case SPECTRE_V2_USER_STRICT:
return ", STIBP: forced";
+ case SPECTRE_V2_USER_STRICT_PREFERRED:
+ return ", STIBP: always-on";
case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP:
if (static_key_enabled(&switch_to_cond_stibp))
@@ -1377,6 +1515,11 @@ static char *ibpb_state(void)
return "";
}
+static ssize_t srbds_show_state(char *buf)
+{
+ return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
+}
+
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@@ -1418,6 +1561,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_ITLB_MULTIHIT:
return itlb_multihit_show_state(buf);
+ case X86_BUG_SRBDS:
+ return srbds_show_state(buf);
+
default:
break;
}
@@ -1464,4 +1610,9 @@ ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr
{
return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT);
}
+
+ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS);
+}
#endif
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 6608c03c2126..cf761e640797 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -134,7 +134,7 @@ static void init_centaur(struct cpuinfo_x86 *c)
clear_cpu_cap(c, X86_FEATURE_TSC);
break;
case 8:
- switch (c->x86_mask) {
+ switch (c->x86_stepping) {
default:
name = "2";
break;
@@ -209,7 +209,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
* - Note, it seems this may only be in engineering samples.
*/
if ((c->x86 == 6) && (c->x86_model == 9) &&
- (c->x86_mask == 1) && (size == 65))
+ (c->x86_stepping == 1) && (size == 65))
size -= 1;
return size;
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e8fa12c7ad5b..32567a5bb8d3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -652,7 +652,7 @@ void cpu_detect(struct cpuinfo_x86 *c)
cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
c->x86 = (tfms >> 8) & 0xf;
c->x86_model = (tfms >> 4) & 0xf;
- c->x86_mask = tfms & 0xf;
+ c->x86_stepping = tfms & 0xf;
if (c->x86 == 0xf)
c->x86 += (tfms >> 20) & 0xff;
@@ -912,9 +912,30 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
{}
};
-static bool __init cpu_matches(unsigned long which)
+#define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \
+ X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \
+ INTEL_FAM6_##model, steppings, \
+ X86_FEATURE_ANY, issues)
+
+#define SRBDS BIT(0)
+
+static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
+ VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(HASWELL_CORE, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(HASWELL_ULT, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(HASWELL_GT3E, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_GT3E, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_CORE, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPINGS(0x0, 0xC), SRBDS),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPINGS(0x0, 0xD), SRBDS),
+ {}
+};
+
+static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long which)
{
- const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist);
+ const struct x86_cpu_id *m = x86_match_cpu(table);
return m && !!(m->driver_data & which);
}
@@ -934,29 +955,32 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
u64 ia32_cap = x86_read_arch_cap_msr();
/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
- if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
+ if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
+ !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
- if (cpu_matches(NO_SPECULATION))
+ if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION))
return;
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
- if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
+ if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
+ !(ia32_cap & ARCH_CAP_SSB_NO) &&
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
if (ia32_cap & ARCH_CAP_IBRS_ALL)
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
- if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) {
+ if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
+ !(ia32_cap & ARCH_CAP_MDS_NO)) {
setup_force_cpu_bug(X86_BUG_MDS);
- if (cpu_matches(MSBDS_ONLY))
+ if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY))
setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
}
- if (!cpu_matches(NO_SWAPGS))
+ if (!cpu_matches(cpu_vuln_whitelist, NO_SWAPGS))
setup_force_cpu_bug(X86_BUG_SWAPGS);
/*
@@ -974,7 +998,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
setup_force_cpu_bug(X86_BUG_TAA);
- if (cpu_matches(NO_MELTDOWN))
+ /*
+ * SRBDS affects CPUs which support RDRAND or RDSEED and are listed
+ * in the vulnerability blacklist.
+ */
+ if ((cpu_has(c, X86_FEATURE_RDRAND) ||
+ cpu_has(c, X86_FEATURE_RDSEED)) &&
+ cpu_matches(cpu_vuln_blacklist, SRBDS))
+ setup_force_cpu_bug(X86_BUG_SRBDS);
+
+ if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
/* Rogue Data Cache Load? No! */
@@ -983,7 +1016,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
- if (cpu_matches(NO_L1TF))
+ if (cpu_matches(cpu_vuln_whitelist, NO_L1TF))
return;
setup_force_cpu_bug(X86_BUG_L1TF);
@@ -1157,7 +1190,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
c->loops_per_jiffy = loops_per_jiffy;
c->x86_cache_size = 0;
c->x86_vendor = X86_VENDOR_UNKNOWN;
- c->x86_model = c->x86_mask = 0; /* So far unknown... */
+ c->x86_model = c->x86_stepping = 0; /* So far unknown... */
c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */
c->x86_max_cores = 1;
@@ -1327,6 +1360,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
#endif
mtrr_ap_init();
x86_spec_ctrl_setup_ap();
+ update_srbds_msr();
}
struct msr_range {
@@ -1403,8 +1437,8 @@ void print_cpu_info(struct cpuinfo_x86 *c)
printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
- if (c->x86_mask || c->cpuid_level >= 0)
- printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask);
+ if (c->x86_stepping || c->cpuid_level >= 0)
+ pr_cont(", stepping: 0x%x)\n", c->x86_stepping);
else
printk(KERN_CONT ")\n");
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index c42cc1acd668..f2eca5632f7d 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -64,6 +64,7 @@ extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
extern void x86_spec_ctrl_setup_ap(void);
+extern void update_srbds_msr(void);
extern u64 x86_read_arch_cap_msr(void);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 151625a83d9e..bc90e879998c 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -212,7 +212,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
/* common case step number/rev -- exceptions handled below */
c->x86_model = (dir1 >> 4) + 1;
- c->x86_mask = dir1 & 0xf;
+ c->x86_stepping = dir1 & 0xf;
/* Now cook; the original recipe is by Channing Corn, from Cyrix.
* We do the same thing for each generation: we work out
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 7beef3da5904..cb73d16d540c 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -80,7 +80,7 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
if (c->x86_model == spectre_bad_microcodes[i].model &&
- c->x86_mask == spectre_bad_microcodes[i].stepping)
+ c->x86_stepping == spectre_bad_microcodes[i].stepping)
return (c->microcode <= spectre_bad_microcodes[i].microcode);
}
return false;
@@ -130,7 +130,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
* need the microcode to have already been loaded... so if it is
* not, recommend a BIOS update and disable large pages.
*/
- if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
+ if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 &&
c->microcode < 0x20e) {
printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
clear_cpu_cap(c, X86_FEATURE_PSE);
@@ -146,7 +146,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
/* CPUID workaround for 0F33/0F34 CPU */
if (c->x86 == 0xF && c->x86_model == 0x3
- && (c->x86_mask == 0x3 || c->x86_mask == 0x4))
+ && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4))
c->x86_phys_bits = 36;
/*
@@ -246,8 +246,8 @@ int ppro_with_ram_bug(void)
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
boot_cpu_data.x86 == 6 &&
boot_cpu_data.x86_model == 1 &&
- boot_cpu_data.x86_mask < 8) {
- printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n");
+ boot_cpu_data.x86_stepping < 8) {
+ pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
return 1;
}
return 0;
@@ -263,7 +263,7 @@ static void intel_smp_check(struct cpuinfo_x86 *c)
* Mask B, Pentium, but not Pentium MMX
*/
if (c->x86 == 5 &&
- c->x86_mask >= 1 && c->x86_mask <= 4 &&
+ c->x86_stepping >= 1 && c->x86_stepping <= 4 &&
c->x86_model <= 3) {
/*
* Remember we have B step Pentia with bugs
@@ -306,7 +306,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
* model 3 mask 3
*/
- if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
+ if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633)
clear_cpu_cap(c, X86_FEATURE_SEP);
/*
@@ -324,7 +324,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* P4 Xeon errata 037 workaround.
* Hardware prefetcher may cause stale data to be loaded into the cache.
*/
- if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
+ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) {
if (msr_set_bit(MSR_IA32_MISC_ENABLE,
MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
> 0) {
@@ -340,7 +340,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* Specification Update").
*/
if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
- (c->x86_mask < 0x6 || c->x86_mask == 0xb))
+ (c->x86_stepping < 0x6 || c->x86_stepping == 0xb))
set_cpu_bug(c, X86_BUG_11AP);
@@ -555,7 +555,7 @@ static void init_intel(struct cpuinfo_x86 *c)
case 6:
if (l2 == 128)
p = "Celeron (Mendocino)";
- else if (c->x86_mask == 0 || c->x86_mask == 5)
+ else if (c->x86_stepping == 0 || c->x86_stepping == 5)
p = "Celeron-A";
break;
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index fbb5e90557a5..a207aaaf78db 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -33,13 +33,18 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
const struct x86_cpu_id *m;
struct cpuinfo_x86 *c = &boot_cpu_data;
- for (m = match; m->vendor | m->family | m->model | m->feature; m++) {
+ for (m = match;
+ m->vendor | m->family | m->model | m->steppings | m->feature;
+ m++) {
if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor)
continue;
if (m->family != X86_FAMILY_ANY && c->x86 != m->family)
continue;
if (m->model != X86_MODEL_ANY && c->x86_model != m->model)
continue;
+ if (m->steppings != X86_STEPPING_ANY &&
+ !(BIT(c->x86_stepping) & m->steppings))
+ continue;
if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature))
continue;
return m;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index afaf648386e9..d4c3a30a7b33 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -132,51 +132,6 @@ load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
}
}
-/*
- * Given CPU signature and a microcode patch, this function finds if the
- * microcode patch has matching family and model with the CPU.
- */
-static enum ucode_state
-matching_model_microcode(struct microcode_header_intel *mc_header,
- unsigned long sig)
-{
- unsigned int fam, model;
- unsigned int fam_ucode, model_ucode;
- struct extended_sigtable *ext_header;
- unsigned long total_size = get_totalsize(mc_header);
- unsigned long data_size = get_datasize(mc_header);
- int ext_sigcount, i;
- struct extended_signature *ext_sig;
-
- fam = __x86_family(sig);
- model = x86_model(sig);
-
- fam_ucode = __x86_family(mc_header->sig);
- model_ucode = x86_model(mc_header->sig);
-
- if (fam == fam_ucode && model == model_ucode)
- return UCODE_OK;
-
- /* Look for ext. headers: */
- if (total_size <= data_size + MC_HEADER_SIZE)
- return UCODE_NFOUND;
-
- ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE;
- ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
- ext_sigcount = ext_header->count;
-
- for (i = 0; i < ext_sigcount; i++) {
- fam_ucode = __x86_family(ext_sig->sig);
- model_ucode = x86_model(ext_sig->sig);
-
- if (fam == fam_ucode && model == model_ucode)
- return UCODE_OK;
-
- ext_sig++;
- }
- return UCODE_NFOUND;
-}
-
static int
save_microcode(struct mc_saved_data *mc_saved_data,
struct microcode_intel **mc_saved_src,
@@ -321,8 +276,8 @@ get_matching_model_microcode(int cpu, unsigned long start,
* the platform, we need to find and save microcode patches
* with the same family and model as the BSP.
*/
- if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
- UCODE_OK) {
+ if (!find_matching_signature(mc_header, uci->cpu_sig.sig,
+ uci->cpu_sig.pf)) {
ucode_ptr += mc_size;
continue;
}
@@ -1013,7 +968,7 @@ static bool is_blacklisted(unsigned int cpu)
*/
if (c->x86 == 6 &&
c->x86_model == 79 &&
- c->x86_mask == 0x01 &&
+ c->x86_stepping == 0x01 &&
llc_size_per_core > 2621440 &&
c->microcode < 0x0b000021) {
pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
@@ -1036,7 +991,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
return UCODE_NFOUND;
sprintf(name, "intel-ucode/%02x-%02x-%02x",
- c->x86, c->x86_model, c->x86_mask);
+ c->x86, c->x86_model, c->x86_stepping);
if (request_firmware_direct(&firmware, name, device)) {
pr_debug("data file %s load failed\n", name);
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 136ae86f4f5f..e2fa0fcbaa69 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -166,9 +166,6 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
*repeat = 0;
*uniform = 1;
- /* Make end inclusive instead of exclusive */
- end--;
-
prev_match = MTRR_TYPE_INVALID;
for (i = 0; i < num_var_ranges; ++i) {
unsigned short start_state, end_state, inclusive;
@@ -260,6 +257,9 @@ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform)
int repeat;
u64 partial_end;
+ /* Make end inclusive instead of exclusive */
+ end--;
+
if (!mtrr_state_set)
return MTRR_TYPE_INVALID;
@@ -860,7 +860,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
*/
if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
boot_cpu_data.x86_model == 1 &&
- boot_cpu_data.x86_mask <= 7) {
+ boot_cpu_data.x86_stepping <= 7) {
if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
return -EINVAL;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 49bd700d9b7f..be3050f23536 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -699,8 +699,8 @@ void __init mtrr_bp_init(void)
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
boot_cpu_data.x86 == 0xF &&
boot_cpu_data.x86_model == 0x3 &&
- (boot_cpu_data.x86_mask == 0x3 ||
- boot_cpu_data.x86_mask == 0x4))
+ (boot_cpu_data.x86_stepping == 0x3 ||
+ boot_cpu_data.x86_stepping == 0x4))
phys_addr = 36;
size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index fbf2edc3eb35..851fbdb99767 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1550,6 +1550,7 @@ static void __init filter_events(struct attribute **attrs)
{
struct device_attribute *d;
struct perf_pmu_events_attr *pmu_attr;
+ int offset = 0;
int i, j;
for (i = 0; attrs[i]; i++) {
@@ -1558,7 +1559,7 @@ static void __init filter_events(struct attribute **attrs)
/* str trumps id */
if (pmu_attr->event_str)
continue;
- if (x86_pmu.event_map(i))
+ if (x86_pmu.event_map(i + offset))
continue;
for (j = i; attrs[j]; j++)
@@ -1566,6 +1567,14 @@ static void __init filter_events(struct attribute **attrs)
/* Check the shifted attr. */
i--;
+
+ /*
+ * event_map() is index based, the attrs array is organized
+ * by increasing event index. If we shift the events, then
+ * we need to compensate for the event_map(), otherwise
+ * we are looking up the wrong event in the map
+ */
+ offset++;
}
}
@@ -1992,6 +2001,7 @@ static int x86_pmu_event_init(struct perf_event *event)
if (err) {
if (event->destroy)
event->destroy(event);
+ event->destroy = NULL;
}
if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
index 97242a9242bd..ec0bfbab7265 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_iommu.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
@@ -80,12 +80,12 @@ static struct attribute_group amd_iommu_format_group = {
* sysfs events attributes
*---------------------------------------------*/
struct amd_iommu_event_desc {
- struct kobj_attribute attr;
+ struct device_attribute attr;
const char *event;
};
-static ssize_t _iommu_event_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
+static ssize_t _iommu_event_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct amd_iommu_event_desc *event =
container_of(attr, struct amd_iommu_event_desc, attr);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 3572434a73cb..d973c079e97c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1937,7 +1937,8 @@ intel_bts_constraints(struct perf_event *event)
static int intel_alt_er(int idx, u64 config)
{
- int alt_idx;
+ int alt_idx = idx;
+
if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
return idx;
@@ -3051,7 +3052,7 @@ static int intel_snb_pebs_broken(int cpu)
break;
case 45: /* SNB-EP */
- switch (cpu_data(cpu).x86_mask) {
+ switch (cpu_data(cpu).x86_stepping) {
case 6: rev = 0x618; break;
case 7: rev = 0x70c; break;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 2cdae69d7e0b..09058ad9816c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -1017,7 +1017,7 @@ void __init intel_pmu_lbr_init_atom(void)
* on PMU interrupt
*/
if (boot_cpu_data.x86_model == 28
- && boot_cpu_data.x86_mask < 10) {
+ && boot_cpu_data.x86_stepping < 10) {
pr_cont("LBR disabled due to erratum");
return;
}
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 7c1a0c07b607..507e2e319f52 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -233,7 +233,7 @@ static __initconst const struct x86_pmu p6_pmu = {
static __init void p6_pmu_rdpmc_quirk(void)
{
- if (boot_cpu_data.x86_mask < 9) {
+ if (boot_cpu_data.x86_stepping < 9) {
/*
* PPro erratum 26; fixed in stepping 9 and above.
*/
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 935225c0375f..c4f772d3f35c 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -70,8 +70,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
c->x86_model,
c->x86_model_id[0] ? c->x86_model_id : "unknown");
- if (c->x86_mask || c->cpuid_level >= 0)
- seq_printf(m, "stepping\t: %d\n", c->x86_mask);
+ if (c->x86_stepping || c->cpuid_level >= 0)
+ seq_printf(m, "stepping\t: %d\n", c->x86_stepping);
else
seq_puts(m, "stepping\t: unknown\n");
if (c->microcode)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 2c1910f6717e..a6d623e43d62 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -23,6 +23,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/overflow.h>
#include <asm/processor.h>
#include <asm/hardirq.h>
@@ -572,7 +573,7 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
struct crash_memmap_data cmd;
struct crash_mem *cmem;
- cmem = vzalloc(sizeof(struct crash_mem));
+ cmem = vzalloc(struct_size(cmem, ranges, 1));
if (!cmem)
return -ENOMEM;
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 31fad2cbd734..9a1489b92782 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -262,15 +262,23 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
return 0;
}
- if (!access_ok(VERIFY_READ, buf, size))
+ if (!access_ok(VERIFY_READ, buf, size)) {
+ fpu__clear(fpu);
return -EACCES;
+ }
fpu__activate_curr(fpu);
- if (!static_cpu_has(X86_FEATURE_FPU))
- return fpregs_soft_set(current, NULL,
- 0, sizeof(struct user_i387_ia32_struct),
- NULL, buf) != 0;
+ if (!static_cpu_has(X86_FEATURE_FPU)) {
+ int ret = fpregs_soft_set(current, NULL, 0,
+ sizeof(struct user_i387_ia32_struct),
+ NULL, buf);
+
+ if (ret)
+ fpu__clear(fpu);
+
+ return ret != 0;
+ }
if (use_xsave()) {
struct _fpx_sw_bytes fx_sw_user;
@@ -317,10 +325,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
}
+ local_bh_disable();
fpu->fpstate_active = 1;
- preempt_disable();
fpu__restore(fpu);
- preempt_enable();
+ local_bh_enable();
return err;
} else {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 3fa200ecca62..1ff1adbc843b 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -293,12 +293,31 @@ static void __init setup_xstate_comp(void)
}
/*
+ * All supported features have either init state all zeros or are
+ * handled in setup_init_fpu() individually. This is an explicit
+ * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
+ * newly added supported features at build time and make people
+ * actually look at the init state for the new feature.
+ */
+#define XFEATURES_INIT_FPSTATE_HANDLED \
+ (XFEATURE_MASK_FP | \
+ XFEATURE_MASK_SSE | \
+ XFEATURE_MASK_YMM | \
+ XFEATURE_MASK_OPMASK | \
+ XFEATURE_MASK_ZMM_Hi256 | \
+ XFEATURE_MASK_Hi16_ZMM | \
+ XFEATURE_MASK_BNDREGS | \
+ XFEATURE_MASK_BNDCSR)
+
+/*
* setup the xstate image representing the init state
*/
static void __init setup_init_fpu_buf(void)
{
static int on_boot_cpu = 1;
+ BUILD_BUG_ON(XCNTXT_MASK != XFEATURES_INIT_FPSTATE_HANDLED);
+
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
@@ -319,10 +338,22 @@ static void __init setup_init_fpu_buf(void)
copy_kernel_to_xregs_booting(&init_fpstate.xsave);
/*
- * Dump the init state again. This is to identify the init state
- * of any feature which is not represented by all zero's.
+ * All components are now in init state. Read the state back so
+ * that init_fpstate contains all non-zero init state. This only
+ * works with XSAVE, but not with XSAVEOPT and XSAVES because
+ * those use the init optimization which skips writing data for
+ * components in init state.
+ *
+ * XSAVE could be used, but that would require to reshuffle the
+ * data when XSAVES is available because XSAVES uses xstate
+ * compaction. But doing so is a pointless exercise because most
+ * components have an all zeros init state except for the legacy
+ * ones (FP and SSE). Those can be saved with FXSAVE into the
+ * legacy area. Adding new features requires to ensure that init
+ * state is all zeroes or if not to add the necessary handling
+ * here.
*/
- copy_xregs_to_kernel_booting(&init_fpstate.xsave);
+ fxsave(&init_fpstate.fxsave);
}
static int xfeature_is_supervisor(int xfeature_nr)
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 1c0b49fd6365..10139b8f1e53 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -34,7 +34,7 @@
#define X86 new_cpu_data+CPUINFO_x86
#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor
#define X86_MODEL new_cpu_data+CPUINFO_x86_model
-#define X86_MASK new_cpu_data+CPUINFO_x86_mask
+#define X86_STEPPING new_cpu_data+CPUINFO_x86_stepping
#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math
#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level
#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
@@ -440,7 +440,7 @@ enable_paging:
shrb $4,%al
movb %al,X86_MODEL
andb $0x0f,%cl # mask mask revision
- movb %cl,X86_MASK
+ movb %cl,X86_STEPPING
movl %edx,X86_CAPABILITY
is486:
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 4e3b8a587c88..01a1ab8483ac 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -204,7 +204,7 @@ spurious_8259A_irq:
* lets ACK and report it. [once per IRQ]
*/
if (!(spurious_irq_mask & irqmask)) {
- printk(KERN_DEBUG
+ printk_deferred(KERN_DEBUG
"spurious 8259A interrupt: IRQ%d.\n", irq);
spurious_irq_mask |= irqmask;
}
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 1d41f50f3558..f6a27e6981df 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -211,8 +211,7 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params,
params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch;
/* Copying screen_info will do? */
- memcpy(&params->screen_info, &boot_params.screen_info,
- sizeof(struct screen_info));
+ memcpy(&params->screen_info, &screen_info, sizeof(struct screen_info));
/* Fill in memsize later */
params->screen_info.ext_mem_k = 0;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 7c48aa03fe77..ebd4da00a56e 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -750,6 +750,13 @@ __visible __used void *trampoline_handler(struct pt_regs *regs)
void *frame_pointer;
bool skipped = false;
+ /*
+ * Set a dummy kprobe for avoiding kretprobe recursion.
+ * Since kretprobe never run in kprobe handler, kprobe must not
+ * be running at this point.
+ */
+ kprobe_busy_begin();
+
INIT_HLIST_HEAD(&empty_rp);
kretprobe_hash_lock(current, &head, &flags);
/* fixup registers */
@@ -825,10 +832,9 @@ __visible __used void *trampoline_handler(struct pt_regs *regs)
orig_ret_address = (unsigned long)ri->ret_addr;
if (ri->rp && ri->rp->handler) {
__this_cpu_write(current_kprobe, &ri->rp->kp);
- get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
ri->ret_addr = correct_ret_addr;
ri->rp->handler(ri, regs);
- __this_cpu_write(current_kprobe, NULL);
+ __this_cpu_write(current_kprobe, &kprobe_busy);
}
recycle_rp_inst(ri, &empty_rp);
@@ -844,6 +850,8 @@ __visible __used void *trampoline_handler(struct pt_regs *regs)
kretprobe_hash_unlock(current, &flags);
+ kprobe_busy_end();
+
hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
kfree(ri);
@@ -1006,6 +1014,11 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* So clear it by resetting the current kprobe:
*/
regs->flags &= ~X86_EFLAGS_TF;
+ /*
+ * Since the single step (trap) has been cancelled,
+ * we need to restore BTF here.
+ */
+ restore_btf();
/*
* If the TF flag was set before the kprobe hit,
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 94779f66bf49..6f0d340594ca 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -124,6 +124,7 @@ int apply_relocate(Elf32_Shdr *sechdrs,
*location += sym->st_value;
break;
case R_386_PC32:
+ case R_386_PLT32:
/* Add the value, subtract its position */
*location += sym->st_value - (uint32_t)location;
break;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 30ca7607cbbb..c1cccfa900de 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -407,7 +407,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
processor.cpuflag = CPU_ENABLED;
processor.cpufeature = (boot_cpu_data.x86 << 8) |
- (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+ (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_stepping;
processor.featureflag = boot_cpu_data.x86_capability[0];
processor.reserved[0] = 0;
processor.reserved[1] = 0;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 632195b41688..2cd05f34c0b6 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -168,9 +168,6 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
ret = paravirt_patch_ident_64(insnbuf, len);
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
-#ifdef CONFIG_X86_32
- type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
-#endif
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
/* If operation requires a jmp, then jmp */
@@ -226,7 +223,6 @@ static u64 native_steal_clock(int cpu)
/* These are in entry.S */
extern void native_iret(void);
-extern void native_irq_enable_sysexit(void);
extern void native_usergs_sysret32(void);
extern void native_usergs_sysret64(void);
@@ -385,9 +381,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.load_sp0 = native_load_sp0,
-#if defined(CONFIG_X86_32)
- .irq_enable_sysexit = native_irq_enable_sysexit,
-#endif
#ifdef CONFIG_X86_64
#ifdef CONFIG_IA32_EMULATION
.usergs_sysret32 = native_usergs_sysret32,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index c89f50a76e97..158dc0650d5d 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -5,7 +5,6 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
DEF_NATIVE(pv_cpu_ops, iret, "iret");
-DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
@@ -46,7 +45,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, restore_fl);
PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_cpu_ops, iret);
- PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_mmu_ops, read_cr2);
PATCH_SITE(pv_mmu_ops, read_cr3);
PATCH_SITE(pv_mmu_ops, write_cr3);
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 0677bf8d3a42..03c6a8cf33c4 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -12,7 +12,6 @@ DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
-DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "swapgs; sti; sysexit");
DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 64090c943f05..46427f2955fa 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -333,28 +333,20 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
u64 msr = x86_spec_ctrl_base;
bool updmsr = false;
- /*
- * If TIF_SSBD is different, select the proper mitigation
- * method. Note that if SSBD mitigation is disabled or permanentely
- * enabled this branch can't be taken because nothing can set
- * TIF_SSBD.
- */
- if (tif_diff & _TIF_SSBD) {
- if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
+ /* Handle change of TIF_SSBD depending on the mitigation method. */
+ if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
+ if (tif_diff & _TIF_SSBD)
amd_set_ssb_virt_state(tifn);
- } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
+ } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
+ if (tif_diff & _TIF_SSBD)
amd_set_core_ssb_state(tifn);
- } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
- static_cpu_has(X86_FEATURE_AMD_SSBD)) {
- msr |= ssbd_tif_to_spec_ctrl(tifn);
- updmsr = true;
- }
+ } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+ static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+ updmsr |= !!(tif_diff & _TIF_SSBD);
+ msr |= ssbd_tif_to_spec_ctrl(tifn);
}
- /*
- * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled,
- * otherwise avoid the MSR write.
- */
+ /* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled. */
if (IS_ENABLED(CONFIG_SMP) &&
static_branch_unlikely(&switch_to_cond_stibp)) {
updmsr |= !!(tif_diff & _TIF_SPEC_IB);
diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h
index 898e97cf6629..320ab978fb1f 100644
--- a/arch/x86/kernel/process.h
+++ b/arch/x86/kernel/process.h
@@ -19,7 +19,7 @@ static inline void switch_to_extra(struct task_struct *prev,
if (IS_ENABLED(CONFIG_SMP)) {
/*
* Avoid __switch_to_xtra() invocation when conditional
- * STIPB is disabled and the only different bit is
+ * STIBP is disabled and the only different bit is
* TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not
* in the TIF_WORK_CTXSW masks.
*/
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 618565fecb1c..1a79d451cd34 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -128,7 +128,7 @@ void release_thread(struct task_struct *dead_task)
if (dead_task->mm->context.ldt) {
pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
dead_task->comm,
- dead_task->mm->context.ldt,
+ dead_task->mm->context.ldt->entries,
dead_task->mm->context.ldt->size);
BUG();
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 9a16932c7258..143c06f84596 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -162,6 +162,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
},
},
+ { /* Handle problems with rebooting on Apple MacBook6,1 */
+ .callback = set_pci_reboot,
+ .ident = "Apple MacBook6,1",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MacBook6,1"),
+ },
+ },
{ /* Handle problems with rebooting on Apple MacBookPro5 */
.callback = set_pci_reboot,
.ident = "Apple MacBookPro5",
@@ -329,10 +337,11 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
},
{ /* Handle problems with rebooting on the OptiPlex 990. */
.callback = set_pci_reboot,
- .ident = "Dell OptiPlex 990",
+ .ident = "Dell OptiPlex 990 BIOS A0x",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"),
+ DMI_MATCH(DMI_BIOS_VERSION, "A0"),
},
},
{ /* Handle problems with rebooting on Dell 300's */
@@ -410,6 +419,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
},
},
+ { /* PCIe Wifi card isn't detected after reboot otherwise */
+ .callback = set_pci_reboot,
+ .ident = "Zotac ZBOX CI327 nano",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "NA"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ZBOX-CI327NANO-GS-01"),
+ },
+ },
+
/* Sony */
{ /* Handle problems with rebooting on Sony VGN-Z540N */
.callback = set_bios_reboot,
@@ -471,29 +489,20 @@ static void emergency_vmx_disable_all(void)
local_irq_disable();
/*
- * We need to disable VMX on all CPUs before rebooting, otherwise
- * we risk hanging up the machine, because the CPU ignore INIT
- * signals when VMX is enabled.
- *
- * We can't take any locks and we may be on an inconsistent
- * state, so we use NMIs as IPIs to tell the other CPUs to disable
- * VMX and halt.
+ * Disable VMX on all CPUs before rebooting, otherwise we risk hanging
+ * the machine, because the CPU blocks INIT when it's in VMX root.
*
- * For safety, we will avoid running the nmi_shootdown_cpus()
- * stuff unnecessarily, but we don't have a way to check
- * if other CPUs have VMX enabled. So we will call it only if the
- * CPU we are running on has VMX enabled.
+ * We can't take any locks and we may be on an inconsistent state, so
+ * use NMIs as IPIs to tell the other CPUs to exit VMX root and halt.
*
- * We will miss cases where VMX is not enabled on all CPUs. This
- * shouldn't do much harm because KVM always enable VMX on all
- * CPUs anyway. But we can miss it on the small window where KVM
- * is still enabling VMX.
+ * Do the NMI shootdown even if VMX if off on _this_ CPU, as that
+ * doesn't prevent a different CPU from being in VMX root operation.
*/
- if (cpu_has_vmx() && cpu_vmx_enabled()) {
- /* Disable VMX on this CPU. */
- cpu_vmxoff();
+ if (cpu_has_vmx()) {
+ /* Safely force _this_ CPU out of VMX root operation. */
+ __cpu_emergency_vmxoff();
- /* Halt and disable VMX on the other CPUs */
+ /* Halt and exit VMX root operation on the other CPUs. */
nmi_shootdown_cpus(vmxoff_nmi);
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c017f1c71560..0512af683871 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -243,6 +243,14 @@ static void notrace start_secondary(void *unused)
wmb();
cpu_startup_entry(CPUHP_ONLINE);
+
+ /*
+ * Prevent tail call to cpu_startup_entry() because the stack protector
+ * guard has been changed a couple of function calls up, in
+ * boot_init_stack_canary() and must not be checked before tail calling
+ * another function.
+ */
+ prevent_tail_call_optimization();
}
void __init smp_store_boot_cpu_info(void)
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 590c8fd2ed9b..700b8e857025 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -22,10 +22,6 @@
#include <asm/hpet.h>
#include <asm/time.h>
-#ifdef CONFIG_X86_64
-__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES;
-#endif
-
unsigned long profile_pc(struct pt_regs *regs)
{
unsigned long pc = instruction_pointer(regs);
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 8c38784cf992..60ccfa4c2768 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -268,10 +268,11 @@ static volatile u32 good_2byte_insns[256 / 32] = {
static bool is_prefix_bad(struct insn *insn)
{
+ insn_byte_t p;
int i;
- for (i = 0; i < insn->prefixes.nbytes; i++) {
- switch (insn->prefixes.bytes[i]) {
+ for_each_insn_prefix(insn, i, p) {
+ switch (p) {
case 0x26: /* INAT_PFX_ES */
case 0x2E: /* INAT_PFX_CS */
case 0x36: /* INAT_PFX_DS */
@@ -711,6 +712,7 @@ static struct uprobe_xol_ops branch_xol_ops = {
static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
{
u8 opc1 = OPCODE1(insn);
+ insn_byte_t p;
int i;
switch (opc1) {
@@ -741,8 +743,8 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
* Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix.
* No one uses these insns, reject any branch insns with such prefix.
*/
- for (i = 0; i < insn->prefixes.nbytes; i++) {
- if (insn->prefixes.bytes[i] == 0x66)
+ for_each_insn_prefix(insn, i, p) {
+ if (p == 0x66)
return -ENOTSUPP;
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 17e1e60b6b40..b05da220ea0a 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -34,13 +34,13 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
#ifdef CONFIG_X86_32
OUTPUT_ARCH(i386)
ENTRY(phys_startup_32)
-jiffies = jiffies_64;
#else
OUTPUT_ARCH(i386:x86-64)
ENTRY(phys_startup_64)
-jiffies_64 = jiffies;
#endif
+jiffies = jiffies_64;
+
#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
/*
* On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 13bda3fcf42b..b60ffd1b3ae2 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -611,8 +611,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
unsigned phys_as = entry->eax & 0xff;
- if (!g_phys_as)
+ /*
+ * Use bare metal's MAXPHADDR if the CPU doesn't report guest
+ * MAXPHYADDR separately, or if TDP (NPT) is disabled, as the
+ * guest version "applies only to guests using nested paging".
+ */
+ if (!g_phys_as || !tdp_enabled)
g_phys_as = phys_as;
+
entry->eax = g_phys_as | (virt_as << 8);
entry->edx = 0;
/*
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 466028623e1a..827d54a5126e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3524,7 +3524,7 @@ static int em_rdpid(struct x86_emulate_ctxt *ctxt)
u64 tsc_aux = 0;
if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
- return emulate_gp(ctxt, 0);
+ return emulate_ud(ctxt);
ctxt->dst.val = tsc_aux;
return X86EMUL_CONTINUE;
}
@@ -3922,6 +3922,12 @@ static int em_clflush(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
+static int em_clflushopt(struct x86_emulate_ctxt *ctxt)
+{
+ /* emulating clflushopt regardless of cpuid */
+ return X86EMUL_CONTINUE;
+}
+
static int em_movsxd(struct x86_emulate_ctxt *ctxt)
{
ctxt->dst.val = (s32) ctxt->src.val;
@@ -4411,7 +4417,7 @@ static const struct opcode group11[] = {
};
static const struct gprefix pfx_0f_ae_7 = {
- I(SrcMem | ByteOp, em_clflush), N, N, N,
+ I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,
};
static const struct group_dual group15 = { {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a5b533aea958..2ff0fe32c015 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3679,7 +3679,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
rsvd_bits(maxphyaddr, 51);
rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd |
- nonleaf_bit8_rsvd | gbpages_bit_rsvd |
+ gbpages_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c
index 822829f00590..04890ac518d0 100644
--- a/arch/x86/kvm/pmu_intel.c
+++ b/arch/x86/kvm/pmu_intel.c
@@ -29,7 +29,7 @@ static struct kvm_event_hw_type_mapping intel_arch_events[] = {
[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
- [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES },
+ [7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES },
};
/* mapping between fixed pmc index and intel_arch_events array */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9fc536657492..77bee73faebc 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2250,7 +2250,7 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr
dst->iopm_base_pa = from->iopm_base_pa;
dst->msrpm_base_pa = from->msrpm_base_pa;
dst->tsc_offset = from->tsc_offset;
- dst->asid = from->asid;
+ /* asid not copied, it is handled manually for svm->vmcb. */
dst->tlb_ctl = from->tlb_ctl;
dst->int_ctl = from->int_ctl;
dst->int_vector = from->int_vector;
@@ -2564,7 +2564,11 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
svm->nested.intercept = nested_vmcb->control.intercept;
svm_flush_tlb(&svm->vcpu);
- svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
+ svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl &
+ (V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK);
+
+ svm->vmcb->control.int_ctl |= V_INTR_MASKING_MASK;
+
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
else
@@ -2927,7 +2931,7 @@ static int cr_interception(struct vcpu_svm *svm)
err = 0;
if (cr >= 16) { /* mov to cr */
cr -= 16;
- val = kvm_register_read(&svm->vcpu, reg);
+ val = kvm_register_readl(&svm->vcpu, reg);
switch (cr) {
case 0:
if (!check_selective_cr0_intercepted(svm, val))
@@ -2972,7 +2976,7 @@ static int cr_interception(struct vcpu_svm *svm)
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
}
- kvm_register_write(&svm->vcpu, reg, val);
+ kvm_register_writel(&svm->vcpu, reg, val);
}
kvm_complete_insn_gp(&svm->vcpu, err);
@@ -3004,13 +3008,13 @@ static int dr_interception(struct vcpu_svm *svm)
if (dr >= 16) { /* mov to DRn */
if (!kvm_require_dr(&svm->vcpu, dr - 16))
return 1;
- val = kvm_register_read(&svm->vcpu, reg);
+ val = kvm_register_readl(&svm->vcpu, reg);
kvm_set_dr(&svm->vcpu, dr - 16, val);
} else {
if (!kvm_require_dr(&svm->vcpu, dr))
return 1;
kvm_get_dr(&svm->vcpu, dr, &val);
- kvm_register_write(&svm->vcpu, reg, val);
+ kvm_register_writel(&svm->vcpu, reg, val);
}
skip_emulated_instruction(&svm->vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2634b4556202..6646edaa5123 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5441,7 +5441,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
*/
static void kvm_machine_check(void)
{
-#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
+#if defined(CONFIG_X86_MCE)
struct pt_regs regs = {
.cs = 3, /* Fake ring 3 no matter what the guest ran on */
.flags = X86_EFLAGS_IF,
@@ -7844,7 +7844,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
return true;
}
- switch (exit_reason) {
+ switch ((u16)exit_reason) {
case EXIT_REASON_EXCEPTION_NMI:
if (is_nmi(intr_info))
return false;
@@ -8235,6 +8235,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
exit_reason != EXIT_REASON_EPT_VIOLATION &&
exit_reason != EXIT_REASON_PML_FULL &&
+ exit_reason != EXIT_REASON_APIC_ACCESS &&
exit_reason != EXIT_REASON_TASK_SWITCH)) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3adc255e69cb..8dce61ca934b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2162,7 +2162,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return kvm_mtrr_set_msr(vcpu, msr, data);
case MSR_IA32_APICBASE:
return kvm_set_apic_base(vcpu, msr_info);
- case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
+ case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
return kvm_x2apic_msr_write(vcpu, msr, data);
case MSR_IA32_TSCDEADLINE:
kvm_set_lapic_tscdeadline_msr(vcpu, data);
@@ -2172,6 +2172,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!msr_info->host_initiated) {
s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
adjust_tsc_offset_guest(vcpu, adj);
+ /* Before back to guest, tsc_timestamp must be adjusted
+ * as well, otherwise guest's percpu pvclock time could jump.
+ */
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
}
vcpu->arch.ia32_tsc_adjust_msr = data;
}
@@ -2432,7 +2436,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_APICBASE:
msr_info->data = kvm_get_apic_base(vcpu);
break;
- case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
+ case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
break;
case MSR_IA32_TSCDEADLINE:
@@ -2941,7 +2945,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
unsigned bank_num = mcg_cap & 0xff, bank;
r = -EINVAL;
- if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
+ if (!bank_num || bank_num > KVM_MAX_MCE_BANKS)
goto out;
if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
goto out;
@@ -4013,10 +4017,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&u.ps, argp, sizeof u.ps))
goto out;
+ mutex_lock(&kvm->lock);
r = -ENXIO;
if (!kvm->arch.vpit)
- goto out;
+ goto set_pit_out;
r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
+set_pit_out:
+ mutex_unlock(&kvm->lock);
break;
}
case KVM_GET_PIT2: {
@@ -4036,10 +4043,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
goto out;
+ mutex_lock(&kvm->lock);
r = -ENXIO;
if (!kvm->arch.vpit)
- goto out;
+ goto set_pit2_out;
r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
+set_pit2_out:
+ mutex_unlock(&kvm->lock);
break;
}
case KVM_REINJECT_CONTROL: {
@@ -6010,6 +6020,7 @@ void kvm_arch_exit(void)
unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
#ifdef CONFIG_X86_64
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
+ cancel_work_sync(&pvclock_gtod_work);
#endif
kvm_x86_ops = NULL;
kvm_mmu_module_exit();
@@ -6717,6 +6728,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
set_debugreg(vcpu->arch.eff_db[3], 3);
set_debugreg(vcpu->arch.dr6, 6);
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
+ } else if (unlikely(hw_breakpoint_active())) {
+ set_debugreg(0, 7);
}
kvm_x86_ops->run(vcpu);
diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c
index 518532e6a3fa..8a3bc242c5e9 100644
--- a/arch/x86/lib/msr-smp.c
+++ b/arch/x86/lib/msr-smp.c
@@ -239,7 +239,7 @@ static void __wrmsr_safe_regs_on_cpu(void *info)
rv->err = wrmsr_safe_regs(rv->regs);
}
-int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
+int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
{
int err;
struct msr_regs_info rv;
@@ -252,7 +252,7 @@ int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
}
EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu);
-int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
+int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
{
int err;
struct msr_regs_info rv;
diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S
index d258f59564e1..3b40c98bbbd4 100644
--- a/arch/x86/math-emu/wm_sqrt.S
+++ b/arch/x86/math-emu/wm_sqrt.S
@@ -208,7 +208,7 @@ sqrt_stage_2_finish:
#ifdef PARANOID
/* It should be possible to get here only if the arg is ffff....ffff */
- cmp $0xffffffff,FPU_fsqrt_arg_1
+ cmpl $0xffffffff,FPU_fsqrt_arg_1
jnz sqrt_stage_2_error
#endif /* PARANOID */
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 0f1c6fc3ddd8..47770ccab6d7 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -15,6 +15,7 @@
#include <linux/debugfs.h>
#include <linux/mm.h>
#include <linux/module.h>
+#include <linux/sched.h>
#include <linux/seq_file.h>
#include <asm/pgtable.h>
@@ -407,6 +408,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
} else
note_page(m, &st, __pgprot(0), 1);
+ cond_resched();
start++;
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index f00eb52c16a6..17eb564901ca 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -109,8 +109,6 @@ __ref void *alloc_low_pages(unsigned int num)
} else {
pfn = pgt_buf_end;
pgt_buf_end += num;
- printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n",
- pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1);
}
for (i = 0; i < num; i++) {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index d76ec9348cff..547d80fc76d7 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1182,21 +1182,21 @@ int kern_addr_valid(unsigned long addr)
return 0;
pud = pud_offset(pgd, addr);
- if (pud_none(*pud))
+ if (!pud_present(*pud))
return 0;
if (pud_large(*pud))
return pfn_valid(pud_pfn(*pud));
pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd))
+ if (!pmd_present(*pmd))
return 0;
if (pmd_large(*pmd))
return pfn_valid(pmd_pfn(*pmd));
pte = pte_offset_kernel(pmd, addr);
- if (pte_none(*pte))
+ if (!pte_present(*pte))
return 0;
return pfn_valid(pte_pfn(*pte));
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 0057a7accfb1..5448ad4d0703 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -385,7 +385,7 @@ static void enter_uniprocessor(void)
int cpu;
int err;
- if (downed_cpus == NULL &&
+ if (!cpumask_available(downed_cpus) &&
!alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) {
pr_notice("Failed to allocate mask\n");
goto out;
@@ -415,7 +415,7 @@ static void leave_uniprocessor(void)
int cpu;
int err;
- if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0)
+ if (!cpumask_available(downed_cpus) || cpumask_weight(downed_cpus) == 0)
return;
pr_notice("Re-enabling CPUs...\n");
for_each_cpu(cpu, downed_cpus) {
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 3ed4753280aa..289518bb0e8d 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -720,6 +720,8 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
}
free_page((unsigned long)pmd_sv);
+
+ pgtable_pmd_page_dtor(virt_to_page(pmd));
free_page((unsigned long)pmd);
return 1;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index dd9a861fd526..82f8cd0a3af9 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -145,6 +145,19 @@ static bool is_ereg(u32 reg)
BIT(BPF_REG_9));
}
+/*
+ * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64
+ * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte
+ * of encoding. al,cl,dl,bl have simpler encoding.
+ */
+static bool is_ereg_8l(u32 reg)
+{
+ return is_ereg(reg) ||
+ (1 << reg) & (BIT(BPF_REG_1) |
+ BIT(BPF_REG_2) |
+ BIT(BPF_REG_FP));
+}
+
/* add modifiers if 'reg' maps to x64 registers r8..r15 */
static u8 add_1mod(u8 byte, u32 reg)
{
@@ -731,9 +744,8 @@ st: if (is_imm8(insn->off))
/* STX: *(u8*)(dst_reg + off) = src_reg */
case BPF_STX | BPF_MEM | BPF_B:
/* emit 'mov byte ptr [rax + off], al' */
- if (is_ereg(dst_reg) || is_ereg(src_reg) ||
- /* have to add extra byte for x86 SIL, DIL regs */
- src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+ if (is_ereg(dst_reg) || is_ereg_8l(src_reg))
+ /* Add extra byte for eregs or SIL,DIL,BPL in src_reg */
EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
else
EMIT1(0x88);
@@ -1026,7 +1038,16 @@ common_load:
}
if (image) {
- if (unlikely(proglen + ilen > oldproglen)) {
+ /*
+ * When populating the image, assert that:
+ *
+ * i) We do not write beyond the allocated space, and
+ * ii) addrs[i] did not change from the prior run, in order
+ * to validate assumptions made for computing branch
+ * displacements.
+ */
+ if (unlikely(proglen + ilen > oldproglen ||
+ proglen + ilen != addrs[i])) {
pr_err("bpf_jit_compile fatal error\n");
return -EFAULT;
}
diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk
index fd1ab80be0de..a4cf678cf5c8 100644
--- a/arch/x86/tools/chkobjdump.awk
+++ b/arch/x86/tools/chkobjdump.awk
@@ -10,6 +10,7 @@ BEGIN {
/^GNU objdump/ {
verstr = ""
+ gsub(/\(.*\)/, "");
for (i = 3; i <= NF; i++)
if (match($(i), "^[0-9]")) {
verstr = $(i);
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 5b6c8486a0be..d1c3f82c7882 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -839,9 +839,11 @@ static int do_reloc32(struct section *sec, Elf_Rel *rel, Elf_Sym *sym,
case R_386_PC32:
case R_386_PC16:
case R_386_PC8:
+ case R_386_PLT32:
/*
- * NONE can be ignored and PC relative relocations don't
- * need to be adjusted.
+ * NONE can be ignored and PC relative relocations don't need
+ * to be adjusted. Because sym must be defined, R_386_PLT32 can
+ * be treated the same way as R_386_PC32.
*/
break;
@@ -882,9 +884,11 @@ static int do_reloc_real(struct section *sec, Elf_Rel *rel, Elf_Sym *sym,
case R_386_PC32:
case R_386_PC16:
case R_386_PC8:
+ case R_386_PLT32:
/*
- * NONE can be ignored and PC relative relocations don't
- * need to be adjusted.
+ * NONE can be ignored and PC relative relocations don't need
+ * to be adjusted. Because sym must be defined, R_386_PLT32 can
+ * be treated the same way as R_386_PC32.
*/
break;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 82fd84d5e1aa..28725a6ed5de 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -861,8 +861,8 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
preempt_enable();
}
-static void xen_convert_trap_info(const struct desc_ptr *desc,
- struct trap_info *traps)
+static unsigned xen_convert_trap_info(const struct desc_ptr *desc,
+ struct trap_info *traps, bool full)
{
unsigned in, out, count;
@@ -872,17 +872,18 @@ static void xen_convert_trap_info(const struct desc_ptr *desc,
for (in = out = 0; in < count; in++) {
gate_desc *entry = (gate_desc*)(desc->address) + in;
- if (cvt_gate_to_trap(in, entry, &traps[out]))
+ if (cvt_gate_to_trap(in, entry, &traps[out]) || full)
out++;
}
- traps[out].address = 0;
+
+ return out;
}
void xen_copy_trap_info(struct trap_info *traps)
{
const struct desc_ptr *desc = this_cpu_ptr(&idt_desc);
- xen_convert_trap_info(desc, traps);
+ xen_convert_trap_info(desc, traps, true);
}
/* Load a new IDT into Xen. In principle this can be per-CPU, so we
@@ -892,6 +893,7 @@ static void xen_load_idt(const struct desc_ptr *desc)
{
static DEFINE_SPINLOCK(lock);
static struct trap_info traps[257];
+ unsigned out;
trace_xen_cpu_load_idt(desc);
@@ -899,7 +901,8 @@ static void xen_load_idt(const struct desc_ptr *desc)
memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc));
- xen_convert_trap_info(desc, traps);
+ out = xen_convert_trap_info(desc, traps, false);
+ memset(&traps[out], 0, sizeof(traps[0]));
xen_mc_flush();
if (HYPERVISOR_set_trap_table(traps))
@@ -1240,10 +1243,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.iret = xen_iret,
#ifdef CONFIG_X86_64
- .usergs_sysret32 = xen_sysret32,
.usergs_sysret64 = xen_sysret64,
-#else
- .irq_enable_sysexit = xen_sysexit,
#endif
.load_tr_desc = paravirt_nop,
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index cab9f766bb06..af0ebe18248a 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -623,8 +623,8 @@ int xen_alloc_p2m_entry(unsigned long pfn)
}
/* Expanded the p2m? */
- if (pfn > xen_p2m_last_pfn) {
- xen_p2m_last_pfn = pfn;
+ if (pfn >= xen_p2m_last_pfn) {
+ xen_p2m_last_pfn = ALIGN(pfn + 1, P2M_PER_PAGE);
HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn;
}
@@ -723,9 +723,12 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
for (i = 0; i < count; i++) {
unsigned long mfn, pfn;
+ struct gnttab_unmap_grant_ref unmap[2];
+ int rc;
/* Do not add to override if the map failed. */
- if (map_ops[i].status)
+ if (map_ops[i].status != GNTST_okay ||
+ (kmap_ops && kmap_ops[i].status != GNTST_okay))
continue;
if (map_ops[i].flags & GNTMAP_contains_pte) {
@@ -739,10 +742,46 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned");
- if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
- ret = -ENOMEM;
- goto out;
+ if (likely(set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
+ continue;
+
+ /*
+ * Signal an error for this slot. This in turn requires
+ * immediate unmapping.
+ */
+ map_ops[i].status = GNTST_general_error;
+ unmap[0].host_addr = map_ops[i].host_addr,
+ unmap[0].handle = map_ops[i].handle;
+ map_ops[i].handle = ~0;
+ if (map_ops[i].flags & GNTMAP_device_map)
+ unmap[0].dev_bus_addr = map_ops[i].dev_bus_addr;
+ else
+ unmap[0].dev_bus_addr = 0;
+
+ if (kmap_ops) {
+ kmap_ops[i].status = GNTST_general_error;
+ unmap[1].host_addr = kmap_ops[i].host_addr,
+ unmap[1].handle = kmap_ops[i].handle;
+ kmap_ops[i].handle = ~0;
+ if (kmap_ops[i].flags & GNTMAP_device_map)
+ unmap[1].dev_bus_addr = kmap_ops[i].dev_bus_addr;
+ else
+ unmap[1].dev_bus_addr = 0;
}
+
+ /*
+ * Pre-populate both status fields, to be recognizable in
+ * the log message below.
+ */
+ unmap[0].status = 1;
+ unmap[1].status = 1;
+
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+ unmap, 1 + !!kmap_ops);
+ if (rc || unmap[0].status != GNTST_okay ||
+ unmap[1].status != GNTST_okay)
+ pr_err_once("gnttab unmap failed: rc=%d st0=%d st1=%d\n",
+ rc, unmap[0].status, unmap[1].status);
}
out:
@@ -763,17 +802,15 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i]));
unsigned long pfn = page_to_pfn(pages[i]);
- if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) {
+ if (mfn != INVALID_P2M_ENTRY && (mfn & FOREIGN_FRAME_BIT))
+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ else
ret = -EINVAL;
- goto out;
- }
-
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
if (kunmap_ops)
ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
- kunmap_ops, count);
-out:
+ kunmap_ops, count) ?: ret;
+
return ret;
}
EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 29e50d1229bc..ee48506ca151 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -116,6 +116,7 @@ asmlinkage __visible void cpu_bringup_and_idle(int cpu)
#endif
cpu_bringup();
cpu_startup_entry(CPUHP_ONLINE);
+ prevent_tail_call_optimization();
}
static void xen_smp_intr_free(unsigned int cpu)
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 85872a08994a..e9fc0f7df0da 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -301,10 +301,20 @@ void xen_init_lock_cpu(int cpu)
void xen_uninit_lock_cpu(int cpu)
{
+ int irq;
+
if (!xen_pvspin)
return;
- unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
+ /*
+ * When booting the kernel with 'mitigations=auto,nosmt', the secondary
+ * CPUs are not activated, and lock_kicker_irq is not initialized.
+ */
+ irq = per_cpu(lock_kicker_irq, cpu);
+ if (irq == -1)
+ return;
+
+ unbind_from_irqhandler(irq, NULL);
per_cpu(lock_kicker_irq, cpu) = -1;
kfree(per_cpu(irq_name, cpu));
per_cpu(irq_name, cpu) = NULL;
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index fd92a64d748e..feb6d40a0860 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -35,20 +35,6 @@ check_events:
ret
/*
- * We can't use sysexit directly, because we're not running in ring0.
- * But we can easily fake it up using iret. Assuming xen_sysexit is
- * jumped to with a standard stack frame, we can just strip it back to
- * a standard iret frame and use iret.
- */
-ENTRY(xen_sysexit)
- movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
- orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
- lea PT_EIP(%esp), %esp
-
- jmp xen_iret
-ENDPROC(xen_sysexit)
-
-/*
* This is run where a normal iret would be run, with the same stack setup:
* 8: eflags
* 4: cs
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 1399423f3418..4140b070f2e9 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -139,9 +139,6 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long);
/* These are not functions, and cannot be called normally */
__visible void xen_iret(void);
-#ifdef CONFIG_X86_32
-__visible void xen_sysexit(void);
-#endif
__visible void xen_sysret32(void);
__visible void xen_sysret64(void);
__visible void xen_adjust_exception_frame(void);