diff options
Diffstat (limited to 'arch/x86')
100 files changed, 757 insertions, 405 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 8b4d022ce0cb..c0045e3ad0f5 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -34,12 +34,13 @@ REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \ -DDISABLE_BRANCH_PROFILING \ -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ - -mno-mmx -mno-sse + -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none) REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -ffreestanding) REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -fno-stack-protector) REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -Wno-address-of-packed-member) REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4)) +REALMODE_CFLAGS += $(CLANG_FLAGS) export REALMODE_CFLAGS # BITS is used as extension for files which are available in a 32 bit @@ -61,6 +62,9 @@ endif KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow KBUILD_CFLAGS += $(call cc-option,-mno-avx,) +# Intel CET isn't enabled in the kernel +KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) + ifeq ($(CONFIG_X86_32),y) BITS := 32 UTS_MACHINE := i386 diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index bf0c7b6b00c3..01eafd8aeec6 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -31,6 +31,8 @@ KBUILD_CFLAGS += -mno-mmx -mno-sse KBUILD_CFLAGS += $(call cc-option,-ffreestanding) KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) +# Disable relocation relaxation in case the link is not PIE. +KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no) KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 028be48c8839..a628b2474e6d 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -218,7 +218,6 @@ CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_EFI=y # CONFIG_LCD_CLASS_DEVICE is not set -CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set @@ -248,6 +247,7 @@ CONFIG_USB_HIDDEV=y CONFIG_USB=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_MON=y +CONFIG_USB_XHCI_HCD=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_TT_NEWSCHED=y CONFIG_USB_OHCI_HCD=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index cb5b3ab5beec..649f7d604b12 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -212,7 +212,6 @@ CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_EFI=y # CONFIG_LCD_CLASS_DEVICE is not set -CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set @@ -242,6 +241,7 @@ CONFIG_USB_HIDDEV=y CONFIG_USB=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_MON=y +CONFIG_USB_XHCI_HCD=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_TT_NEWSCHED=y CONFIG_USB_OHCI_HCD=y diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 575c9afeba9b..217b60246cbb 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -331,7 +331,8 @@ sysenter_past_esp: * Return back to the vDSO, which will pop ecx and edx. * Don't bother with DS and ES (they already contain __USER_DS). */ - ENABLE_INTERRUPTS_SYSEXIT + sti + sysexit .pushsection .fixup, "ax" 2: movl $0, PT_FS(%esp) @@ -554,11 +555,6 @@ ENTRY(native_iret) iret _ASM_EXTABLE(native_iret, iret_exc) END(native_iret) - -ENTRY(native_irq_enable_sysexit) - sti - sysexit -END(native_irq_enable_sysexit) #endif ENTRY(overflow) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 6b505d91cac5..eec6defbe733 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -57,7 +57,7 @@ ENDPROC(native_usergs_sysret64) .macro TRACE_IRQS_IRETQ #ifdef CONFIG_TRACE_IRQFLAGS - bt $9, EFLAGS(%rsp) /* interrupts off? */ + btl $9, EFLAGS(%rsp) /* interrupts off? */ jnc 1f TRACE_IRQS_ON 1: diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 94c18ebfd68c..fd51f638e4ab 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -92,7 +92,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) if (boot_cpu_data.x86 == 0x0F && boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86_model <= 0x05 && - boot_cpu_data.x86_mask < 0x0A) + boot_cpu_data.x86_stepping < 0x0A) return 1; else if (amd_e400_c1e_detected) return 1; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3328a37ddc75..34f11bc42d9b 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -168,16 +168,6 @@ static inline void disable_local_APIC(void) { } #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_X2APIC -/* - * Make previous memory operations globally visible before - * sending the IPI through x2apic wrmsr. We need a serializing instruction or - * mfence for this. - */ -static inline void x2apic_wrmsr_fence(void) -{ - asm volatile("mfence" : : : "memory"); -} - static inline void native_apic_msr_write(u32 reg, u32 v) { if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index 3d1ec41ae09a..20370c6db74b 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -6,8 +6,6 @@ #ifndef _ASM_X86_MACH_DEFAULT_APM_H #define _ASM_X86_MACH_DEFAULT_APM_H -#include <asm/nospec-branch.h> - #ifdef APM_ZERO_SEGS # define APM_DO_ZERO_SEGS \ "pushl %%ds\n\t" \ @@ -33,7 +31,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ - firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -46,7 +43,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, "=S" (*esi) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); - firmware_restrict_branch_speculation_end(); } static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, @@ -59,7 +55,6 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ - firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -72,7 +67,6 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, "=S" (si) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); - firmware_restrict_branch_speculation_end(); return error; } diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index afc2387323c9..47cb64dd319a 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -77,7 +77,7 @@ static __always_inline void atomic_sub(int i, atomic_t *v) */ static __always_inline int atomic_sub_and_test(int i, atomic_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e"); + GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e); } /** @@ -114,7 +114,7 @@ static __always_inline void atomic_dec(atomic_t *v) */ static __always_inline int atomic_dec_and_test(atomic_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); + GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e); } /** @@ -127,7 +127,7 @@ static __always_inline int atomic_dec_and_test(atomic_t *v) */ static __always_inline int atomic_inc_and_test(atomic_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e"); + GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e); } /** @@ -141,7 +141,7 @@ static __always_inline int atomic_inc_and_test(atomic_t *v) */ static __always_inline int atomic_add_negative(int i, atomic_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s"); + GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s); } /** diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 377fa50cc271..fbb9a82599ab 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -72,7 +72,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) */ static inline int atomic64_sub_and_test(long i, atomic64_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", "e"); + GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e); } /** @@ -111,7 +111,7 @@ static __always_inline void atomic64_dec(atomic64_t *v) */ static inline int atomic64_dec_and_test(atomic64_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e"); + GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e); } /** @@ -124,7 +124,7 @@ static inline int atomic64_dec_and_test(atomic64_t *v) */ static inline int atomic64_inc_and_test(atomic64_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e"); + GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e); } /** @@ -138,7 +138,7 @@ static inline int atomic64_inc_and_test(atomic64_t *v) */ static inline int atomic64_add_negative(long i, atomic64_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", "s"); + GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s); } /** diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index b2a5bef74282..134d7ffc662e 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -119,4 +119,22 @@ do { \ #define smp_mb__before_atomic() do { } while (0) #define smp_mb__after_atomic() do { } while (0) +/* + * Make previous memory operations globally visible before + * a WRMSR. + * + * MFENCE makes writes visible, but only affects load/store + * instructions. WRMSR is unfortunately not a load/store + * instruction and is unaffected by MFENCE. The LFENCE ensures + * that the WRMSR is not reordered. + * + * Most WRMSRs are full serializing instructions themselves and + * do not require this barrier. This is only required for the + * IA32_TSC_DEADLINE and X2APIC MSRs. + */ +static inline void weak_wrmsr_fence(void) +{ + asm volatile("mfence; lfence" : : : "memory"); +} + #endif /* _ASM_X86_BARRIER_H */ diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index cfe3b954d5e4..390e323a4de9 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -77,7 +77,7 @@ set_bit(long nr, volatile unsigned long *addr) : "iq" ((u8)CONST_MASK(nr)) : "memory"); } else { - asm volatile(LOCK_PREFIX "bts %1,%0" + asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" : BITOP_ADDR(addr) : "Ir" (nr) : "memory"); } } @@ -93,7 +93,7 @@ set_bit(long nr, volatile unsigned long *addr) */ static inline void __set_bit(long nr, volatile unsigned long *addr) { - asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); + asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory"); } /** @@ -114,7 +114,7 @@ clear_bit(long nr, volatile unsigned long *addr) : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)~CONST_MASK(nr))); } else { - asm volatile(LOCK_PREFIX "btr %1,%0" + asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" : BITOP_ADDR(addr) : "Ir" (nr)); } @@ -136,7 +136,7 @@ static inline void clear_bit_unlock(long nr, volatile unsigned long *addr) static inline void __clear_bit(long nr, volatile unsigned long *addr) { - asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); + asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr)); } /* @@ -168,7 +168,7 @@ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) */ static inline void __change_bit(long nr, volatile unsigned long *addr) { - asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); + asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr)); } /** @@ -187,7 +187,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr) : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)CONST_MASK(nr))); } else { - asm volatile(LOCK_PREFIX "btc %1,%0" + asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" : BITOP_ADDR(addr) : "Ir" (nr)); } @@ -203,7 +203,8 @@ static inline void change_bit(long nr, volatile unsigned long *addr) */ static inline int test_and_set_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), + *addr, "Ir", nr, "%0", c); } /** @@ -232,7 +233,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr) { int oldbit; - asm("bts %2,%1\n\t" + asm(__ASM_SIZE(bts) " %2,%1\n\t" "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr)); @@ -249,7 +250,8 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr) */ static inline int test_and_clear_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), + *addr, "Ir", nr, "%0", c); } /** @@ -272,7 +274,7 @@ static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) { int oldbit; - asm volatile("btr %2,%1\n\t" + asm volatile(__ASM_SIZE(btr) " %2,%1\n\t" "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr)); @@ -284,7 +286,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr) { int oldbit; - asm volatile("btc %2,%1\n\t" + asm volatile(__ASM_SIZE(btc) " %2,%1\n\t" "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); @@ -302,7 +304,8 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr) */ static inline int test_and_change_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), + *addr, "Ir", nr, "%0", c); } static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr) @@ -315,7 +318,7 @@ static inline int variable_test_bit(long nr, volatile const unsigned long *addr) { int oldbit; - asm volatile("bt %2,%1\n\t" + asm volatile(__ASM_SIZE(bt) " %2,%1\n\t" "sbb %0,%0" : "=r" (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index ff501e511d91..b9473858c6b6 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -8,6 +8,33 @@ #include <linux/mod_devicetable.h> +#define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins) + +/** + * X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching + * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@_vendor + * @_family: The family number or X86_FAMILY_ANY + * @_model: The model number, model constant or X86_MODEL_ANY + * @_steppings: Bitmask for steppings, stepping constant or X86_STEPPING_ANY + * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY + * @_data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * Backport version to keep the SRBDS pile consistant. No shorter variants + * required for this. + */ +#define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \ + _steppings, _feature, _data) { \ + .vendor = X86_VENDOR_##_vendor, \ + .family = _family, \ + .model = _model, \ + .steppings = _steppings, \ + .feature = _feature, \ + .driver_data = (unsigned long) _data \ +} + extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match); #endif diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 01ec126c5969..7c7fc5006017 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -270,6 +270,7 @@ #define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ @@ -308,10 +309,10 @@ #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ - /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ @@ -343,5 +344,6 @@ #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ +#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h index fe884e18fa6e..c7854a098b6b 100644 --- a/arch/x86/include/asm/dma.h +++ b/arch/x86/include/asm/dma.h @@ -73,7 +73,7 @@ #define MAX_DMA_PFN ((16UL * 1024 * 1024) >> PAGE_SHIFT) /* 4GB broken PCI/AGP hardware bus master zone */ -#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) +#define MAX_DMA32_PFN (1UL << (32 - PAGE_SHIFT)) #ifdef CONFIG_X86_32 /* The maximum address that we can perform a DMA transfer to on this platform */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 66a5e60f60c4..4fb38927128c 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -217,6 +217,14 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) } } +static inline void fxsave(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx)); + else + asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); +} + /* These macros all use (%edi)/(%rdi) as the single memory argument. */ #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" @@ -290,28 +298,6 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) -{ - u64 mask = -1; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (static_cpu_has(X86_FEATURE_XSAVES)) - XSTATE_OP(XSAVES, xstate, lmask, hmask, err); - else - XSTATE_OP(XSAVE, xstate, lmask, hmask, err); - - /* We should never fault when copying to a kernel buffer: */ - WARN_ON_FPU(err); -} - -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) { u64 mask = -1; diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 5a51fcbbe563..6db02d52cdf4 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -198,6 +198,21 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +/** + * for_each_insn_prefix() -- Iterate prefixes in the instruction + * @insn: Pointer to struct insn. + * @idx: Index storage. + * @prefix: Prefix byte. + * + * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix + * and the index is stored in @idx (note that this @idx is just for a cursor, + * do not change it.) + * Since prefixes.nbytes can be bigger than 4 if some prefixes + * are repeated, it cannot be used for looping over the prefixes. + */ +#define for_each_insn_prefix(insn, idx, prefix) \ + for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) + #define POP_SS_OPCODE 0x1f #define MOV_SREG_OPCODE 0x8e diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 4ad6560847b1..53238f0da79e 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -52,7 +52,7 @@ static inline void local_sub(long i, local_t *l) */ static inline int local_sub_and_test(long i, local_t *l) { - GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", "e"); + GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", e); } /** @@ -65,7 +65,7 @@ static inline int local_sub_and_test(long i, local_t *l) */ static inline int local_dec_and_test(local_t *l) { - GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e"); + GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", e); } /** @@ -78,7 +78,7 @@ static inline int local_dec_and_test(local_t *l) */ static inline int local_inc_and_test(local_t *l) { - GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e"); + GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", e); } /** @@ -92,7 +92,7 @@ static inline int local_inc_and_test(local_t *l) */ static inline int local_add_negative(long i, local_t *l) { - GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", "s"); + GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", s); } /** diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 854a20efa771..4ee65ec8c29c 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -91,6 +91,10 @@ #define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ #define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ +/* SRBDS support */ +#define MSR_IA32_MCU_OPT_CTRL 0x00000123 +#define RNGDS_MITG_DIS BIT(0) + #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 #define MSR_IA32_SYSENTER_EIP 0x00000176 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 783f0711895b..2f84887e8934 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -178,6 +178,7 @@ enum spectre_v2_mitigation { enum spectre_v2_user_mitigation { SPECTRE_V2_USER_NONE, SPECTRE_V2_USER_STRICT, + SPECTRE_V2_USER_STRICT_PREFERRED, SPECTRE_V2_USER_PRCTL, SPECTRE_V2_USER_SECCOMP, }; @@ -274,7 +275,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear); * combination with microcode which triggers a CPU buffer flush when the * instruction is executed. */ -static inline void mds_clear_cpu_buffers(void) +static __always_inline void mds_clear_cpu_buffers(void) { static const u16 ds = __KERNEL_DS; @@ -295,7 +296,7 @@ static inline void mds_clear_cpu_buffers(void) * * Clear CPU buffers if the corresponding static key is enabled */ -static inline void mds_user_clear_cpu_buffers(void) +static __always_inline void mds_user_clear_cpu_buffers(void) { if (static_branch_likely(&mds_user_clear)) mds_clear_cpu_buffers(); diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index c759b3cca663..b4c5099cafee 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -938,13 +938,6 @@ extern void default_banner(void); push %ecx; push %edx; \ call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ pop %edx; pop %ecx - -#define ENABLE_INTERRUPTS_SYSEXIT \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ - CLBR_NONE, \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) - - #else /* !CONFIG_X86_32 */ /* diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 3d44191185f8..cc0e5a666c9e 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -162,15 +162,6 @@ struct pv_cpu_ops { u64 (*read_pmc)(int counter); -#ifdef CONFIG_X86_32 - /* - * Atomically enable interrupts and return to userspace. This - * is only used in 32-bit kernels. 64-bit kernels use - * usergs_sysret32 instead. - */ - void (*irq_enable_sysexit)(void); -#endif - /* * Switch to usermode gs and return to 64-bit usermode using * sysret. Only used in 64-bit kernels to return to 64-bit diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index f5e780bfa2b3..66cd0c862a80 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -534,7 +534,7 @@ static inline int x86_this_cpu_variable_test_bit(int nr, { int oldbit; - asm volatile("bt "__percpu_arg(2)",%1\n\t" + asm volatile("btl "__percpu_arg(2)",%1\n\t" "sbb %0,%0" : "=r" (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 01bcde84d3e4..ad6661ca315d 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -81,7 +81,7 @@ static __always_inline void __preempt_count_sub(int val) */ static __always_inline bool __preempt_count_dec_and_test(void) { - GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); + GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e); } /* diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 048942d53988..55163bc810db 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -88,7 +88,7 @@ struct cpuinfo_x86 { __u8 x86; /* CPU family */ __u8 x86_vendor; /* CPU vendor */ __u8 x86_model; - __u8 x86_mask; + __u8 x86_stepping; #ifdef CONFIG_X86_32 char wp_works_ok; /* It doesn't on 386's */ diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index a4a77286cb1d..ae6f1592530b 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -3,6 +3,8 @@ #include <asm/ldt.h> +struct task_struct; + /* misc architecture specific prototypes */ void syscall_init(void); diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 8f7866a5b9a4..cb0dce0273c8 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -5,7 +5,7 @@ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ - asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ + asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ : : "m" (var), ## __VA_ARGS__ \ : "memory" : cc_label); \ return 0; \ @@ -24,7 +24,7 @@ cc_label: \ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ char c; \ - asm volatile (fullop "; set" cc " %1" \ + asm volatile (fullop "; set" #cc " %1" \ : "+m" (var), "=qm" (c) \ : __VA_ARGS__ : "memory"); \ return c != 0; \ diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 58505f01962f..743bd2d77e51 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -54,8 +54,13 @@ /* * Initialize the stackprotector canary value. * - * NOTE: this must only be called from functions that never return, + * NOTE: this must only be called from functions that never return * and it must always be inlined. + * + * In addition, it should be called from a compilation unit for which + * stack protector is disabled. Alternatively, the caller should not end + * with a function call which gets tail-call optimized as that would + * lead to checking a modified canary value. */ static __always_inline void boot_init_stack_canary(void) { diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 6136d99f537b..c1adb2ed6d41 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -108,6 +108,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_IGN_TPR_SHIFT 20 #define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) +#define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK) + #define V_INTR_MASKING_SHIFT 24 #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 8dab88b85785..33a594f728de 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -245,12 +245,15 @@ static inline void __native_flush_tlb_single(unsigned long addr) * ASID. But, userspace flushes are probably much more * important performance-wise. * - * Make sure to do only a single invpcid when KAISER is - * disabled and we have only a single ASID. + * In the KAISER disabled case, do an INVLPG to make sure + * the mapping is flushed in case it is a global one. */ - if (kaiser_enabled) + if (kaiser_enabled) { invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); - invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); + invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); + } else { + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + } } static inline void __flush_tlb_all(void) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index c986d0b3bc35..df9ee8d768bf 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -105,7 +105,7 @@ int amd_cache_northbridges(void) if (boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model >= 0x8 && (boot_cpu_data.x86_model > 0x9 || - boot_cpu_data.x86_mask >= 0x1)) + boot_cpu_data.x86_stepping >= 0x1)) amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; if (boot_cpu_data.x86 == 0x15) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 4dcf71c26d64..f53849f3f7fb 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -41,6 +41,7 @@ #include <asm/x86_init.h> #include <asm/pgalloc.h> #include <linux/atomic.h> +#include <asm/barrier.h> #include <asm/mpspec.h> #include <asm/i8259.h> #include <asm/proto.h> @@ -464,6 +465,9 @@ static int lapic_next_deadline(unsigned long delta, { u64 tsc; + /* This MSR is special and need a special fence: */ + weak_wrmsr_fence(); + tsc = rdtsc(); wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); return 0; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5e8fc9809da3..497ad354e123 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1040,6 +1040,16 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) { irq = mp_irqs[idx].srcbusirq; legacy = mp_is_legacy_irq(irq); + /* + * IRQ2 is unusable for historical reasons on systems which + * have a legacy PIC. See the comment vs. IRQ2 further down. + * + * If this gets removed at some point then the related code + * in lapic_assign_system_vectors() needs to be adjusted as + * well. + */ + if (legacy && irq == PIC_CASCADE_IR) + return -EINVAL; } mutex_lock(&ioapic_mutex); diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index cc8311c4d298..f474756fc151 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -32,7 +32,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) unsigned long flags; u32 dest; - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); local_irq_save(flags); diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 662e9150ea6f..ad7c3544b07f 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -43,7 +43,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) unsigned long this_cpu; unsigned long flags; - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); local_irq_save(flags); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 4a139465f1d4..7554075414d4 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -648,9 +648,9 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) l = li; } addr1 = (base << shift) + - f * (unsigned long)(1 << m_io); + f * (1ULL << m_io); addr2 = (base << shift) + - (l + 1) * (unsigned long)(1 << m_io); + (l + 1) * (1ULL << m_io); pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n", id, fi, li, lnasid, addr1, addr2); if (max_io < l) diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 052c9c3026cc..dfdbe01ef9f2 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -239,6 +239,7 @@ #include <asm/olpc.h> #include <asm/paravirt.h> #include <asm/reboot.h> +#include <asm/nospec-branch.h> #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); @@ -613,11 +614,13 @@ static long __apm_bios_call(void *_call) gdt[0x40 / 8] = bad_bios_desc; apm_irq_save(flags); + firmware_restrict_branch_speculation_start(); APM_DO_SAVE_SEGS; apm_bios_call_asm(call->func, call->ebx, call->ecx, &call->eax, &call->ebx, &call->ecx, &call->edx, &call->esi); APM_DO_RESTORE_SEGS; + firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; put_cpu(); @@ -689,10 +692,12 @@ static long __apm_bios_call_simple(void *_call) gdt[0x40 / 8] = bad_bios_desc; apm_irq_save(flags); + firmware_restrict_branch_speculation_start(); APM_DO_SAVE_SEGS; error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx, &call->eax); APM_DO_RESTORE_SEGS; + firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; put_cpu(); diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 439df975bc7a..84a7524b202c 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -65,9 +65,6 @@ void common(void) { OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); -#ifdef CONFIG_X86_32 - OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); -#endif OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); #endif diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 6ce39025f467..f876141e380c 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -20,7 +20,7 @@ void foo(void) OFFSET(CPUINFO_x86, cpuinfo_x86, x86); OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); - OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask); + OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping); OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level); OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability); OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 424d8a636615..b8fbe983277b 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -112,7 +112,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) return; } - if (c->x86_model == 6 && c->x86_mask == 1) { + if (c->x86_model == 6 && c->x86_stepping == 1) { const int K6_BUG_LOOP = 1000000; int n; void (*f_vide)(void); @@ -142,7 +142,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) /* K6 with old style WHCR */ if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { + (c->x86_model == 8 && c->x86_stepping < 8)) { /* We can only write allocate on the low 508Mb */ if (mbytes > 508) mbytes = 508; @@ -161,7 +161,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) return; } - if ((c->x86_model == 8 && c->x86_mask > 7) || + if ((c->x86_model == 8 && c->x86_stepping > 7) || c->x86_model == 9 || c->x86_model == 13) { /* The more serious chips .. */ @@ -214,7 +214,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c) * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx * As per AMD technical note 27212 0.2 */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) { rdmsr(MSR_K7_CLK_CTL, l, h); if ((l & 0xfff00000) != 0x20000000) { printk(KERN_INFO @@ -235,12 +235,12 @@ static void init_amd_k7(struct cpuinfo_x86 *c) * but they are not certified as MP capable. */ /* Athlon 660/661 is valid. */ - if ((c->x86_model == 6) && ((c->x86_mask == 0) || - (c->x86_mask == 1))) + if ((c->x86_model == 6) && ((c->x86_stepping == 0) || + (c->x86_stepping == 1))) return; /* Duron 670 is valid */ - if ((c->x86_model == 7) && (c->x86_mask == 0)) + if ((c->x86_model == 7) && (c->x86_stepping == 0)) return; /* @@ -250,8 +250,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c) * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for * more. */ - if (((c->x86_model == 6) && (c->x86_mask >= 2)) || - ((c->x86_model == 7) && (c->x86_mask >= 1)) || + if (((c->x86_model == 6) && (c->x86_stepping >= 2)) || + ((c->x86_model == 7) && (c->x86_stepping >= 1)) || (c->x86_model > 7)) if (cpu_has(c, X86_FEATURE_MP)) return; @@ -563,7 +563,7 @@ static void early_init_amd(struct cpuinfo_x86 *c) /* Set MTRR capability flag if appropriate */ if (c->x86 == 5) if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) + (c->x86_model == 8 && c->x86_stepping >= 8)) set_cpu_cap(c, X86_FEATURE_K6_MTRR); #endif #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) @@ -902,11 +902,11 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) /* AMD errata T13 (order #21922) */ if ((c->x86 == 6)) { /* Duron Rev A0 */ - if (c->x86_model == 3 && c->x86_mask == 0) + if (c->x86_model == 3 && c->x86_stepping == 0) size = 64; /* Tbird rev A1/A2 */ if (c->x86_model == 4 && - (c->x86_mask == 0 || c->x86_mask == 1)) + (c->x86_stepping == 0 || c->x86_stepping == 1)) size = 256; } return size; @@ -1043,7 +1043,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) } /* OSVW unavailable or ID unknown, match family-model-stepping range */ - ms = (cpu->x86_model << 4) | cpu->x86_mask; + ms = (cpu->x86_model << 4) | cpu->x86_stepping; while ((range = *erratum++)) if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && (ms >= AMD_MODEL_RANGE_START(range)) && diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 950e6bb21955..ffc3bc8111b8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -39,6 +39,7 @@ static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); static void __init mds_print_mitigation(void); static void __init taa_select_mitigation(void); +static void __init srbds_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ u64 x86_spec_ctrl_base; @@ -58,7 +59,7 @@ static u64 x86_spec_ctrl_mask = SPEC_CTRL_IBRS; u64 x86_amd_ls_cfg_base; u64 x86_amd_ls_cfg_ssbd_mask; -/* Control conditional STIPB in switch_to() */ +/* Control conditional STIBP in switch_to() */ DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp); /* Control conditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); @@ -99,6 +100,7 @@ void __init check_bugs(void) l1tf_select_mitigation(); mds_select_mitigation(); taa_select_mitigation(); + srbds_select_mitigation(); /* * As MDS and TAA mitigations are inter-related, print MDS @@ -369,6 +371,97 @@ static int __init tsx_async_abort_parse_cmdline(char *str) early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); #undef pr_fmt +#define pr_fmt(fmt) "SRBDS: " fmt + +enum srbds_mitigations { + SRBDS_MITIGATION_OFF, + SRBDS_MITIGATION_UCODE_NEEDED, + SRBDS_MITIGATION_FULL, + SRBDS_MITIGATION_TSX_OFF, + SRBDS_MITIGATION_HYPERVISOR, +}; + +static enum srbds_mitigations srbds_mitigation = SRBDS_MITIGATION_FULL; + +static const char * const srbds_strings[] = { + [SRBDS_MITIGATION_OFF] = "Vulnerable", + [SRBDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [SRBDS_MITIGATION_FULL] = "Mitigation: Microcode", + [SRBDS_MITIGATION_TSX_OFF] = "Mitigation: TSX disabled", + [SRBDS_MITIGATION_HYPERVISOR] = "Unknown: Dependent on hypervisor status", +}; + +static bool srbds_off; + +void update_srbds_msr(void) +{ + u64 mcu_ctrl; + + if (!boot_cpu_has_bug(X86_BUG_SRBDS)) + return; + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return; + + if (srbds_mitigation == SRBDS_MITIGATION_UCODE_NEEDED) + return; + + rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); + + switch (srbds_mitigation) { + case SRBDS_MITIGATION_OFF: + case SRBDS_MITIGATION_TSX_OFF: + mcu_ctrl |= RNGDS_MITG_DIS; + break; + case SRBDS_MITIGATION_FULL: + mcu_ctrl &= ~RNGDS_MITG_DIS; + break; + default: + break; + } + + wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); +} + +static void __init srbds_select_mitigation(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_SRBDS)) + return; + + /* + * Check to see if this is one of the MDS_NO systems supporting + * TSX that are only exposed to SRBDS when TSX is enabled. + */ + ia32_cap = x86_read_arch_cap_msr(); + if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM)) + srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; + else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR; + else if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL)) + srbds_mitigation = SRBDS_MITIGATION_UCODE_NEEDED; + else if (cpu_mitigations_off() || srbds_off) + srbds_mitigation = SRBDS_MITIGATION_OFF; + + update_srbds_msr(); + pr_info("%s\n", srbds_strings[srbds_mitigation]); +} + +static int __init srbds_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!boot_cpu_has_bug(X86_BUG_SRBDS)) + return 0; + + srbds_off = !strcmp(str, "off"); + return 0; +} +early_param("srbds", srbds_parse_cmdline); + +#undef pr_fmt #define pr_fmt(fmt) "Spectre V1 : " fmt enum spectre_v1_mitigation { @@ -465,7 +558,8 @@ early_param("nospectre_v1", nospectre_v1_cmdline); static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; -static enum spectre_v2_user_mitigation spectre_v2_user = SPECTRE_V2_USER_NONE; +static enum spectre_v2_user_mitigation spectre_v2_user_stibp = SPECTRE_V2_USER_NONE; +static enum spectre_v2_user_mitigation spectre_v2_user_ibpb = SPECTRE_V2_USER_NONE; #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -516,10 +610,11 @@ enum spectre_v2_user_cmd { }; static const char * const spectre_v2_user_strings[] = { - [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", - [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", - [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", - [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl", + [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", + [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", + [SPECTRE_V2_USER_STRICT_PREFERRED] = "User space: Mitigation: STIBP always-on protection", + [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", + [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl", }; static const struct { @@ -612,11 +707,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + spectre_v2_user_ibpb = mode; switch (cmd) { case SPECTRE_V2_USER_CMD_FORCE: case SPECTRE_V2_USER_CMD_PRCTL_IBPB: case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: static_branch_enable(&switch_mm_always_ibpb); + spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT; break; case SPECTRE_V2_USER_CMD_PRCTL: case SPECTRE_V2_USER_CMD_AUTO: @@ -632,21 +729,32 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) "always-on" : "conditional"); } - /* If enhanced IBRS is enabled no STIPB required */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + /* + * If enhanced IBRS is enabled or SMT impossible, STIBP is not + * required. + */ + if (!smt_possible || spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; /* - * If SMT is not possible or STIBP is not available clear the STIPB - * mode. + * At this point, an STIBP mode other than "off" has been set. + * If STIBP support is not being forced, check if STIBP always-on + * is preferred. */ - if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP)) + if (mode != SPECTRE_V2_USER_STRICT && + boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + + /* + * If STIBP is not available, clear the STIBP mode. + */ + if (!boot_cpu_has(X86_FEATURE_STIBP)) mode = SPECTRE_V2_USER_NONE; + + spectre_v2_user_stibp = mode; + set_mode: - spectre_v2_user = mode; - /* Only print the STIBP mode when SMT possible */ - if (smt_possible) - pr_info("%s\n", spectre_v2_user_strings[mode]); + pr_info("%s\n", spectre_v2_user_strings[mode]); } static const char * const spectre_v2_strings[] = { @@ -886,10 +994,11 @@ void arch_smt_update(void) { mutex_lock(&spec_ctrl_mutex); - switch (spectre_v2_user) { + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: break; case SPECTRE_V2_USER_STRICT: + case SPECTRE_V2_USER_STRICT_PREFERRED: update_stibp_strict(); break; case SPECTRE_V2_USER_PRCTL: @@ -1114,18 +1223,41 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) return 0; } +static bool is_spec_ib_user_controlled(void) +{ + return spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP; +} + static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) { switch (ctrl) { case PR_SPEC_ENABLE: - if (spectre_v2_user == SPECTRE_V2_USER_NONE) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return 0; + /* - * Indirect branch speculation is always disabled in strict - * mode. + * With strict mode for both IBPB and STIBP, the instruction + * code paths avoid checking this task flag and instead, + * unconditionally run the instruction. However, STIBP and IBPB + * are independent and either can be set to conditionally + * enabled regardless of the mode of the other. + * + * If either is set to conditional, allow the task flag to be + * updated, unless it was force-disabled by a previous prctl + * call. Currently, this is possible on an AMD CPU which has the + * feature X86_FEATURE_AMD_STIBP_ALWAYS_ON. In this case, if the + * kernel is booted with 'spectre_v2_user=seccomp', then + * spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP and + * spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED. */ - if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + if (!is_spec_ib_user_controlled() || + task_spec_ib_force_disable(task)) return -EPERM; + task_clear_spec_ib_disable(task); task_update_spec_tif(task); break; @@ -1135,10 +1267,13 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) * Indirect branch speculation is always allowed when * mitigation is force disabled. */ - if (spectre_v2_user == SPECTRE_V2_USER_NONE) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return -EPERM; - if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + + if (!is_spec_ib_user_controlled()) return 0; + task_set_spec_ib_disable(task); if (ctrl == PR_SPEC_FORCE_DISABLE) task_set_spec_ib_force_disable(task); @@ -1168,7 +1303,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) { if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); - if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) ib_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -1197,21 +1333,21 @@ static int ib_prctl_get(struct task_struct *task) if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return PR_SPEC_NOT_AFFECTED; - switch (spectre_v2_user) { - case SPECTRE_V2_USER_NONE: + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return PR_SPEC_ENABLE; - case SPECTRE_V2_USER_PRCTL: - case SPECTRE_V2_USER_SECCOMP: + else if (is_spec_ib_user_controlled()) { if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - case SPECTRE_V2_USER_STRICT: + } else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) return PR_SPEC_DISABLE; - default: + else return PR_SPEC_NOT_AFFECTED; - } } int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) @@ -1352,11 +1488,13 @@ static char *stibp_state(void) if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return ""; - switch (spectre_v2_user) { + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: return ", STIBP: disabled"; case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; + case SPECTRE_V2_USER_STRICT_PREFERRED: + return ", STIBP: always-on"; case SPECTRE_V2_USER_PRCTL: case SPECTRE_V2_USER_SECCOMP: if (static_key_enabled(&switch_to_cond_stibp)) @@ -1377,6 +1515,11 @@ static char *ibpb_state(void) return ""; } +static ssize_t srbds_show_state(char *buf) +{ + return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -1418,6 +1561,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_ITLB_MULTIHIT: return itlb_multihit_show_state(buf); + case X86_BUG_SRBDS: + return srbds_show_state(buf); + default: break; } @@ -1464,4 +1610,9 @@ ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr { return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT); } + +ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS); +} #endif diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 6608c03c2126..cf761e640797 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -134,7 +134,7 @@ static void init_centaur(struct cpuinfo_x86 *c) clear_cpu_cap(c, X86_FEATURE_TSC); break; case 8: - switch (c->x86_mask) { + switch (c->x86_stepping) { default: name = "2"; break; @@ -209,7 +209,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) * - Note, it seems this may only be in engineering samples. */ if ((c->x86 == 6) && (c->x86_model == 9) && - (c->x86_mask == 1) && (size == 65)) + (c->x86_stepping == 1) && (size == 65)) size -= 1; return size; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e8fa12c7ad5b..32567a5bb8d3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -652,7 +652,7 @@ void cpu_detect(struct cpuinfo_x86 *c) cpuid(0x00000001, &tfms, &misc, &junk, &cap0); c->x86 = (tfms >> 8) & 0xf; c->x86_model = (tfms >> 4) & 0xf; - c->x86_mask = tfms & 0xf; + c->x86_stepping = tfms & 0xf; if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; @@ -912,9 +912,30 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { {} }; -static bool __init cpu_matches(unsigned long which) +#define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ + INTEL_FAM6_##model, steppings, \ + X86_FEATURE_ANY, issues) + +#define SRBDS BIT(0) + +static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { + VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_CORE, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_ULT, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_GT3E, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_GT3E, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_CORE, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPINGS(0x0, 0xC), SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPINGS(0x0, 0xD), SRBDS), + {} +}; + +static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long which) { - const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist); + const struct x86_cpu_id *m = x86_match_cpu(table); return m && !!(m->driver_data & which); } @@ -934,29 +955,32 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) u64 ia32_cap = x86_read_arch_cap_msr(); /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ - if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && + !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); - if (cpu_matches(NO_SPECULATION)) + if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION)) return; setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && + if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && + !(ia32_cap & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); - if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) { + if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && + !(ia32_cap & ARCH_CAP_MDS_NO)) { setup_force_cpu_bug(X86_BUG_MDS); - if (cpu_matches(MSBDS_ONLY)) + if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY)) setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); } - if (!cpu_matches(NO_SWAPGS)) + if (!cpu_matches(cpu_vuln_whitelist, NO_SWAPGS)) setup_force_cpu_bug(X86_BUG_SWAPGS); /* @@ -974,7 +998,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) setup_force_cpu_bug(X86_BUG_TAA); - if (cpu_matches(NO_MELTDOWN)) + /* + * SRBDS affects CPUs which support RDRAND or RDSEED and are listed + * in the vulnerability blacklist. + */ + if ((cpu_has(c, X86_FEATURE_RDRAND) || + cpu_has(c, X86_FEATURE_RDSEED)) && + cpu_matches(cpu_vuln_blacklist, SRBDS)) + setup_force_cpu_bug(X86_BUG_SRBDS); + + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; /* Rogue Data Cache Load? No! */ @@ -983,7 +1016,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - if (cpu_matches(NO_L1TF)) + if (cpu_matches(cpu_vuln_whitelist, NO_L1TF)) return; setup_force_cpu_bug(X86_BUG_L1TF); @@ -1157,7 +1190,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->loops_per_jiffy = loops_per_jiffy; c->x86_cache_size = 0; c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_model = c->x86_stepping = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; @@ -1327,6 +1360,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) #endif mtrr_ap_init(); x86_spec_ctrl_setup_ap(); + update_srbds_msr(); } struct msr_range { @@ -1403,8 +1437,8 @@ void print_cpu_info(struct cpuinfo_x86 *c) printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model); - if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask); + if (c->x86_stepping || c->cpuid_level >= 0) + pr_cont(", stepping: 0x%x)\n", c->x86_stepping); else printk(KERN_CONT ")\n"); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index c42cc1acd668..f2eca5632f7d 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -64,6 +64,7 @@ extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); extern void x86_spec_ctrl_setup_ap(void); +extern void update_srbds_msr(void); extern u64 x86_read_arch_cap_msr(void); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 151625a83d9e..bc90e879998c 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -212,7 +212,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) /* common case step number/rev -- exceptions handled below */ c->x86_model = (dir1 >> 4) + 1; - c->x86_mask = dir1 & 0xf; + c->x86_stepping = dir1 & 0xf; /* Now cook; the original recipe is by Channing Corn, from Cyrix. * We do the same thing for each generation: we work out diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 7beef3da5904..cb73d16d540c 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -80,7 +80,7 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { if (c->x86_model == spectre_bad_microcodes[i].model && - c->x86_mask == spectre_bad_microcodes[i].stepping) + c->x86_stepping == spectre_bad_microcodes[i].stepping) return (c->microcode <= spectre_bad_microcodes[i].microcode); } return false; @@ -130,7 +130,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) * need the microcode to have already been loaded... so if it is * not, recommend a BIOS update and disable large pages. */ - if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 && + if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 && c->microcode < 0x20e) { printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n"); clear_cpu_cap(c, X86_FEATURE_PSE); @@ -146,7 +146,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) /* CPUID workaround for 0F33/0F34 CPU */ if (c->x86 == 0xF && c->x86_model == 0x3 - && (c->x86_mask == 0x3 || c->x86_mask == 0x4)) + && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4)) c->x86_phys_bits = 36; /* @@ -246,8 +246,8 @@ int ppro_with_ram_bug(void) if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask < 8) { - printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); + boot_cpu_data.x86_stepping < 8) { + pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n"); return 1; } return 0; @@ -263,7 +263,7 @@ static void intel_smp_check(struct cpuinfo_x86 *c) * Mask B, Pentium, but not Pentium MMX */ if (c->x86 == 5 && - c->x86_mask >= 1 && c->x86_mask <= 4 && + c->x86_stepping >= 1 && c->x86_stepping <= 4 && c->x86_model <= 3) { /* * Remember we have B step Pentia with bugs @@ -306,7 +306,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until * model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633) clear_cpu_cap(c, X86_FEATURE_SEP); /* @@ -324,7 +324,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * P4 Xeon errata 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ - if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { + if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) { if (msr_set_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) { @@ -340,7 +340,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * Specification Update"). */ if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && - (c->x86_mask < 0x6 || c->x86_mask == 0xb)) + (c->x86_stepping < 0x6 || c->x86_stepping == 0xb)) set_cpu_bug(c, X86_BUG_11AP); @@ -555,7 +555,7 @@ static void init_intel(struct cpuinfo_x86 *c) case 6: if (l2 == 128) p = "Celeron (Mendocino)"; - else if (c->x86_mask == 0 || c->x86_mask == 5) + else if (c->x86_stepping == 0 || c->x86_stepping == 5) p = "Celeron-A"; break; diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index fbb5e90557a5..a207aaaf78db 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -33,13 +33,18 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) const struct x86_cpu_id *m; struct cpuinfo_x86 *c = &boot_cpu_data; - for (m = match; m->vendor | m->family | m->model | m->feature; m++) { + for (m = match; + m->vendor | m->family | m->model | m->steppings | m->feature; + m++) { if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor) continue; if (m->family != X86_FAMILY_ANY && c->x86 != m->family) continue; if (m->model != X86_MODEL_ANY && c->x86_model != m->model) continue; + if (m->steppings != X86_STEPPING_ANY && + !(BIT(c->x86_stepping) & m->steppings)) + continue; if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature)) continue; return m; diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index afaf648386e9..d4c3a30a7b33 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -132,51 +132,6 @@ load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, } } -/* - * Given CPU signature and a microcode patch, this function finds if the - * microcode patch has matching family and model with the CPU. - */ -static enum ucode_state -matching_model_microcode(struct microcode_header_intel *mc_header, - unsigned long sig) -{ - unsigned int fam, model; - unsigned int fam_ucode, model_ucode; - struct extended_sigtable *ext_header; - unsigned long total_size = get_totalsize(mc_header); - unsigned long data_size = get_datasize(mc_header); - int ext_sigcount, i; - struct extended_signature *ext_sig; - - fam = __x86_family(sig); - model = x86_model(sig); - - fam_ucode = __x86_family(mc_header->sig); - model_ucode = x86_model(mc_header->sig); - - if (fam == fam_ucode && model == model_ucode) - return UCODE_OK; - - /* Look for ext. headers: */ - if (total_size <= data_size + MC_HEADER_SIZE) - return UCODE_NFOUND; - - ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - ext_sigcount = ext_header->count; - - for (i = 0; i < ext_sigcount; i++) { - fam_ucode = __x86_family(ext_sig->sig); - model_ucode = x86_model(ext_sig->sig); - - if (fam == fam_ucode && model == model_ucode) - return UCODE_OK; - - ext_sig++; - } - return UCODE_NFOUND; -} - static int save_microcode(struct mc_saved_data *mc_saved_data, struct microcode_intel **mc_saved_src, @@ -321,8 +276,8 @@ get_matching_model_microcode(int cpu, unsigned long start, * the platform, we need to find and save microcode patches * with the same family and model as the BSP. */ - if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != - UCODE_OK) { + if (!find_matching_signature(mc_header, uci->cpu_sig.sig, + uci->cpu_sig.pf)) { ucode_ptr += mc_size; continue; } @@ -1013,7 +968,7 @@ static bool is_blacklisted(unsigned int cpu) */ if (c->x86 == 6 && c->x86_model == 79 && - c->x86_mask == 0x01 && + c->x86_stepping == 0x01 && llc_size_per_core > 2621440 && c->microcode < 0x0b000021) { pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); @@ -1036,7 +991,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device, return UCODE_NFOUND; sprintf(name, "intel-ucode/%02x-%02x-%02x", - c->x86, c->x86_model, c->x86_mask); + c->x86, c->x86_model, c->x86_stepping); if (request_firmware_direct(&firmware, name, device)) { pr_debug("data file %s load failed\n", name); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 136ae86f4f5f..e2fa0fcbaa69 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -166,9 +166,6 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, *repeat = 0; *uniform = 1; - /* Make end inclusive instead of exclusive */ - end--; - prev_match = MTRR_TYPE_INVALID; for (i = 0; i < num_var_ranges; ++i) { unsigned short start_state, end_state, inclusive; @@ -260,6 +257,9 @@ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform) int repeat; u64 partial_end; + /* Make end inclusive instead of exclusive */ + end--; + if (!mtrr_state_set) return MTRR_TYPE_INVALID; @@ -860,7 +860,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, */ if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask <= 7) { + boot_cpu_data.x86_stepping <= 7) { if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); return -EINVAL; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 49bd700d9b7f..be3050f23536 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -699,8 +699,8 @@ void __init mtrr_bp_init(void) if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 0x3 && - (boot_cpu_data.x86_mask == 0x3 || - boot_cpu_data.x86_mask == 0x4)) + (boot_cpu_data.x86_stepping == 0x3 || + boot_cpu_data.x86_stepping == 0x4)) phys_addr = 36; size_or_mask = SIZE_OR_MASK_BITS(phys_addr); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index fbf2edc3eb35..851fbdb99767 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1550,6 +1550,7 @@ static void __init filter_events(struct attribute **attrs) { struct device_attribute *d; struct perf_pmu_events_attr *pmu_attr; + int offset = 0; int i, j; for (i = 0; attrs[i]; i++) { @@ -1558,7 +1559,7 @@ static void __init filter_events(struct attribute **attrs) /* str trumps id */ if (pmu_attr->event_str) continue; - if (x86_pmu.event_map(i)) + if (x86_pmu.event_map(i + offset)) continue; for (j = i; attrs[j]; j++) @@ -1566,6 +1567,14 @@ static void __init filter_events(struct attribute **attrs) /* Check the shifted attr. */ i--; + + /* + * event_map() is index based, the attrs array is organized + * by increasing event index. If we shift the events, then + * we need to compensate for the event_map(), otherwise + * we are looking up the wrong event in the map + */ + offset++; } } @@ -1992,6 +2001,7 @@ static int x86_pmu_event_init(struct perf_event *event) if (err) { if (event->destroy) event->destroy(event); + event->destroy = NULL; } if (ACCESS_ONCE(x86_pmu.attr_rdpmc)) diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c index 97242a9242bd..ec0bfbab7265 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_iommu.c +++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.c @@ -80,12 +80,12 @@ static struct attribute_group amd_iommu_format_group = { * sysfs events attributes *---------------------------------------------*/ struct amd_iommu_event_desc { - struct kobj_attribute attr; + struct device_attribute attr; const char *event; }; -static ssize_t _iommu_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t _iommu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct amd_iommu_event_desc *event = container_of(attr, struct amd_iommu_event_desc, attr); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 3572434a73cb..d973c079e97c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1937,7 +1937,8 @@ intel_bts_constraints(struct perf_event *event) static int intel_alt_er(int idx, u64 config) { - int alt_idx; + int alt_idx = idx; + if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1)) return idx; @@ -3051,7 +3052,7 @@ static int intel_snb_pebs_broken(int cpu) break; case 45: /* SNB-EP */ - switch (cpu_data(cpu).x86_mask) { + switch (cpu_data(cpu).x86_stepping) { case 6: rev = 0x618; break; case 7: rev = 0x70c; break; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 2cdae69d7e0b..09058ad9816c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -1017,7 +1017,7 @@ void __init intel_pmu_lbr_init_atom(void) * on PMU interrupt */ if (boot_cpu_data.x86_model == 28 - && boot_cpu_data.x86_mask < 10) { + && boot_cpu_data.x86_stepping < 10) { pr_cont("LBR disabled due to erratum"); return; } diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 7c1a0c07b607..507e2e319f52 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -233,7 +233,7 @@ static __initconst const struct x86_pmu p6_pmu = { static __init void p6_pmu_rdpmc_quirk(void) { - if (boot_cpu_data.x86_mask < 9) { + if (boot_cpu_data.x86_stepping < 9) { /* * PPro erratum 26; fixed in stepping 9 and above. */ diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 935225c0375f..c4f772d3f35c 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -70,8 +70,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) c->x86_model, c->x86_model_id[0] ? c->x86_model_id : "unknown"); - if (c->x86_mask || c->cpuid_level >= 0) - seq_printf(m, "stepping\t: %d\n", c->x86_mask); + if (c->x86_stepping || c->cpuid_level >= 0) + seq_printf(m, "stepping\t: %d\n", c->x86_stepping); else seq_puts(m, "stepping\t: unknown\n"); if (c->microcode) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 2c1910f6717e..a6d623e43d62 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -23,6 +23,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/overflow.h> #include <asm/processor.h> #include <asm/hardirq.h> @@ -572,7 +573,7 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) struct crash_memmap_data cmd; struct crash_mem *cmem; - cmem = vzalloc(sizeof(struct crash_mem)); + cmem = vzalloc(struct_size(cmem, ranges, 1)); if (!cmem) return -ENOMEM; diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 31fad2cbd734..9a1489b92782 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -262,15 +262,23 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) return 0; } - if (!access_ok(VERIFY_READ, buf, size)) + if (!access_ok(VERIFY_READ, buf, size)) { + fpu__clear(fpu); return -EACCES; + } fpu__activate_curr(fpu); - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_set(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) != 0; + if (!static_cpu_has(X86_FEATURE_FPU)) { + int ret = fpregs_soft_set(current, NULL, 0, + sizeof(struct user_i387_ia32_struct), + NULL, buf); + + if (ret) + fpu__clear(fpu); + + return ret != 0; + } if (use_xsave()) { struct _fpx_sw_bytes fx_sw_user; @@ -317,10 +325,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); } + local_bh_disable(); fpu->fpstate_active = 1; - preempt_disable(); fpu__restore(fpu); - preempt_enable(); + local_bh_enable(); return err; } else { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 3fa200ecca62..1ff1adbc843b 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -293,12 +293,31 @@ static void __init setup_xstate_comp(void) } /* + * All supported features have either init state all zeros or are + * handled in setup_init_fpu() individually. This is an explicit + * feature list and does not use XFEATURE_MASK*SUPPORTED to catch + * newly added supported features at build time and make people + * actually look at the init state for the new feature. + */ +#define XFEATURES_INIT_FPSTATE_HANDLED \ + (XFEATURE_MASK_FP | \ + XFEATURE_MASK_SSE | \ + XFEATURE_MASK_YMM | \ + XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_ZMM_Hi256 | \ + XFEATURE_MASK_Hi16_ZMM | \ + XFEATURE_MASK_BNDREGS | \ + XFEATURE_MASK_BNDCSR) + +/* * setup the xstate image representing the init state */ static void __init setup_init_fpu_buf(void) { static int on_boot_cpu = 1; + BUILD_BUG_ON(XCNTXT_MASK != XFEATURES_INIT_FPSTATE_HANDLED); + WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; @@ -319,10 +338,22 @@ static void __init setup_init_fpu_buf(void) copy_kernel_to_xregs_booting(&init_fpstate.xsave); /* - * Dump the init state again. This is to identify the init state - * of any feature which is not represented by all zero's. + * All components are now in init state. Read the state back so + * that init_fpstate contains all non-zero init state. This only + * works with XSAVE, but not with XSAVEOPT and XSAVES because + * those use the init optimization which skips writing data for + * components in init state. + * + * XSAVE could be used, but that would require to reshuffle the + * data when XSAVES is available because XSAVES uses xstate + * compaction. But doing so is a pointless exercise because most + * components have an all zeros init state except for the legacy + * ones (FP and SSE). Those can be saved with FXSAVE into the + * legacy area. Adding new features requires to ensure that init + * state is all zeroes or if not to add the necessary handling + * here. */ - copy_xregs_to_kernel_booting(&init_fpstate.xsave); + fxsave(&init_fpstate.fxsave); } static int xfeature_is_supervisor(int xfeature_nr) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 1c0b49fd6365..10139b8f1e53 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -34,7 +34,7 @@ #define X86 new_cpu_data+CPUINFO_x86 #define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor #define X86_MODEL new_cpu_data+CPUINFO_x86_model -#define X86_MASK new_cpu_data+CPUINFO_x86_mask +#define X86_STEPPING new_cpu_data+CPUINFO_x86_stepping #define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math #define X86_CPUID new_cpu_data+CPUINFO_cpuid_level #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability @@ -440,7 +440,7 @@ enable_paging: shrb $4,%al movb %al,X86_MODEL andb $0x0f,%cl # mask mask revision - movb %cl,X86_MASK + movb %cl,X86_STEPPING movl %edx,X86_CAPABILITY is486: diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 4e3b8a587c88..01a1ab8483ac 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -204,7 +204,7 @@ spurious_8259A_irq: * lets ACK and report it. [once per IRQ] */ if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG + printk_deferred(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); spurious_irq_mask |= irqmask; } diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 1d41f50f3558..f6a27e6981df 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -211,8 +211,7 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params, params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch; /* Copying screen_info will do? */ - memcpy(¶ms->screen_info, &boot_params.screen_info, - sizeof(struct screen_info)); + memcpy(¶ms->screen_info, &screen_info, sizeof(struct screen_info)); /* Fill in memsize later */ params->screen_info.ext_mem_k = 0; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 7c48aa03fe77..ebd4da00a56e 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -750,6 +750,13 @@ __visible __used void *trampoline_handler(struct pt_regs *regs) void *frame_pointer; bool skipped = false; + /* + * Set a dummy kprobe for avoiding kretprobe recursion. + * Since kretprobe never run in kprobe handler, kprobe must not + * be running at this point. + */ + kprobe_busy_begin(); + INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ @@ -825,10 +832,9 @@ __visible __used void *trampoline_handler(struct pt_regs *regs) orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { __this_cpu_write(current_kprobe, &ri->rp->kp); - get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); - __this_cpu_write(current_kprobe, NULL); + __this_cpu_write(current_kprobe, &kprobe_busy); } recycle_rp_inst(ri, &empty_rp); @@ -844,6 +850,8 @@ __visible __used void *trampoline_handler(struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); + kprobe_busy_end(); + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); @@ -1006,6 +1014,11 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * So clear it by resetting the current kprobe: */ regs->flags &= ~X86_EFLAGS_TF; + /* + * Since the single step (trap) has been cancelled, + * we need to restore BTF here. + */ + restore_btf(); /* * If the TF flag was set before the kprobe hit, diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 94779f66bf49..6f0d340594ca 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -124,6 +124,7 @@ int apply_relocate(Elf32_Shdr *sechdrs, *location += sym->st_value; break; case R_386_PC32: + case R_386_PLT32: /* Add the value, subtract its position */ *location += sym->st_value - (uint32_t)location; break; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 30ca7607cbbb..c1cccfa900de 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -407,7 +407,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01; processor.cpuflag = CPU_ENABLED; processor.cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_stepping; processor.featureflag = boot_cpu_data.x86_capability[0]; processor.reserved[0] = 0; processor.reserved[1] = 0; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 632195b41688..2cd05f34c0b6 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -168,9 +168,6 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, ret = paravirt_patch_ident_64(insnbuf, len); else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || -#ifdef CONFIG_X86_32 - type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || -#endif type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) /* If operation requires a jmp, then jmp */ @@ -226,7 +223,6 @@ static u64 native_steal_clock(int cpu) /* These are in entry.S */ extern void native_iret(void); -extern void native_irq_enable_sysexit(void); extern void native_usergs_sysret32(void); extern void native_usergs_sysret64(void); @@ -385,9 +381,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .load_sp0 = native_load_sp0, -#if defined(CONFIG_X86_32) - .irq_enable_sysexit = native_irq_enable_sysexit, -#endif #ifdef CONFIG_X86_64 #ifdef CONFIG_IA32_EMULATION .usergs_sysret32 = native_usergs_sysret32, diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index c89f50a76e97..158dc0650d5d 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -5,7 +5,6 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); DEF_NATIVE(pv_cpu_ops, iret, "iret"); -DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); @@ -46,7 +45,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, restore_fl); PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_cpu_ops, iret); - PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_mmu_ops, read_cr2); PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, write_cr3); diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 0677bf8d3a42..03c6a8cf33c4 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -12,7 +12,6 @@ DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); DEF_NATIVE(pv_cpu_ops, clts, "clts"); DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); -DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "swapgs; sti; sysexit"); DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 64090c943f05..46427f2955fa 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -333,28 +333,20 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, u64 msr = x86_spec_ctrl_base; bool updmsr = false; - /* - * If TIF_SSBD is different, select the proper mitigation - * method. Note that if SSBD mitigation is disabled or permanentely - * enabled this branch can't be taken because nothing can set - * TIF_SSBD. - */ - if (tif_diff & _TIF_SSBD) { - if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + /* Handle change of TIF_SSBD depending on the mitigation method. */ + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + if (tif_diff & _TIF_SSBD) amd_set_ssb_virt_state(tifn); - } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + if (tif_diff & _TIF_SSBD) amd_set_core_ssb_state(tifn); - } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) { - msr |= ssbd_tif_to_spec_ctrl(tifn); - updmsr = true; - } + } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + updmsr |= !!(tif_diff & _TIF_SSBD); + msr |= ssbd_tif_to_spec_ctrl(tifn); } - /* - * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled, - * otherwise avoid the MSR write. - */ + /* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled. */ if (IS_ENABLED(CONFIG_SMP) && static_branch_unlikely(&switch_to_cond_stibp)) { updmsr |= !!(tif_diff & _TIF_SPEC_IB); diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h index 898e97cf6629..320ab978fb1f 100644 --- a/arch/x86/kernel/process.h +++ b/arch/x86/kernel/process.h @@ -19,7 +19,7 @@ static inline void switch_to_extra(struct task_struct *prev, if (IS_ENABLED(CONFIG_SMP)) { /* * Avoid __switch_to_xtra() invocation when conditional - * STIPB is disabled and the only different bit is + * STIBP is disabled and the only different bit is * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not * in the TIF_WORK_CTXSW masks. */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 618565fecb1c..1a79d451cd34 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -128,7 +128,7 @@ void release_thread(struct task_struct *dead_task) if (dead_task->mm->context.ldt) { pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", dead_task->comm, - dead_task->mm->context.ldt, + dead_task->mm->context.ldt->entries, dead_task->mm->context.ldt->size); BUG(); } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 9a16932c7258..143c06f84596 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -162,6 +162,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"), }, }, + { /* Handle problems with rebooting on Apple MacBook6,1 */ + .callback = set_pci_reboot, + .ident = "Apple MacBook6,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBook6,1"), + }, + }, { /* Handle problems with rebooting on Apple MacBookPro5 */ .callback = set_pci_reboot, .ident = "Apple MacBookPro5", @@ -329,10 +337,11 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { }, { /* Handle problems with rebooting on the OptiPlex 990. */ .callback = set_pci_reboot, - .ident = "Dell OptiPlex 990", + .ident = "Dell OptiPlex 990 BIOS A0x", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"), + DMI_MATCH(DMI_BIOS_VERSION, "A0"), }, }, { /* Handle problems with rebooting on Dell 300's */ @@ -410,6 +419,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { }, }, + { /* PCIe Wifi card isn't detected after reboot otherwise */ + .callback = set_pci_reboot, + .ident = "Zotac ZBOX CI327 nano", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NA"), + DMI_MATCH(DMI_PRODUCT_NAME, "ZBOX-CI327NANO-GS-01"), + }, + }, + /* Sony */ { /* Handle problems with rebooting on Sony VGN-Z540N */ .callback = set_bios_reboot, @@ -471,29 +489,20 @@ static void emergency_vmx_disable_all(void) local_irq_disable(); /* - * We need to disable VMX on all CPUs before rebooting, otherwise - * we risk hanging up the machine, because the CPU ignore INIT - * signals when VMX is enabled. - * - * We can't take any locks and we may be on an inconsistent - * state, so we use NMIs as IPIs to tell the other CPUs to disable - * VMX and halt. + * Disable VMX on all CPUs before rebooting, otherwise we risk hanging + * the machine, because the CPU blocks INIT when it's in VMX root. * - * For safety, we will avoid running the nmi_shootdown_cpus() - * stuff unnecessarily, but we don't have a way to check - * if other CPUs have VMX enabled. So we will call it only if the - * CPU we are running on has VMX enabled. + * We can't take any locks and we may be on an inconsistent state, so + * use NMIs as IPIs to tell the other CPUs to exit VMX root and halt. * - * We will miss cases where VMX is not enabled on all CPUs. This - * shouldn't do much harm because KVM always enable VMX on all - * CPUs anyway. But we can miss it on the small window where KVM - * is still enabling VMX. + * Do the NMI shootdown even if VMX if off on _this_ CPU, as that + * doesn't prevent a different CPU from being in VMX root operation. */ - if (cpu_has_vmx() && cpu_vmx_enabled()) { - /* Disable VMX on this CPU. */ - cpu_vmxoff(); + if (cpu_has_vmx()) { + /* Safely force _this_ CPU out of VMX root operation. */ + __cpu_emergency_vmxoff(); - /* Halt and disable VMX on the other CPUs */ + /* Halt and exit VMX root operation on the other CPUs. */ nmi_shootdown_cpus(vmxoff_nmi); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c017f1c71560..0512af683871 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -243,6 +243,14 @@ static void notrace start_secondary(void *unused) wmb(); cpu_startup_entry(CPUHP_ONLINE); + + /* + * Prevent tail call to cpu_startup_entry() because the stack protector + * guard has been changed a couple of function calls up, in + * boot_init_stack_canary() and must not be checked before tail calling + * another function. + */ + prevent_tail_call_optimization(); } void __init smp_store_boot_cpu_info(void) diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 590c8fd2ed9b..700b8e857025 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -22,10 +22,6 @@ #include <asm/hpet.h> #include <asm/time.h> -#ifdef CONFIG_X86_64 -__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES; -#endif - unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 8c38784cf992..60ccfa4c2768 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -268,10 +268,11 @@ static volatile u32 good_2byte_insns[256 / 32] = { static bool is_prefix_bad(struct insn *insn) { + insn_byte_t p; int i; - for (i = 0; i < insn->prefixes.nbytes; i++) { - switch (insn->prefixes.bytes[i]) { + for_each_insn_prefix(insn, i, p) { + switch (p) { case 0x26: /* INAT_PFX_ES */ case 0x2E: /* INAT_PFX_CS */ case 0x36: /* INAT_PFX_DS */ @@ -711,6 +712,7 @@ static struct uprobe_xol_ops branch_xol_ops = { static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) { u8 opc1 = OPCODE1(insn); + insn_byte_t p; int i; switch (opc1) { @@ -741,8 +743,8 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. * No one uses these insns, reject any branch insns with such prefix. */ - for (i = 0; i < insn->prefixes.nbytes; i++) { - if (insn->prefixes.bytes[i] == 0x66) + for_each_insn_prefix(insn, i, p) { + if (p == 0x66) return -ENOTSUPP; } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 17e1e60b6b40..b05da220ea0a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -34,13 +34,13 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) #ifdef CONFIG_X86_32 OUTPUT_ARCH(i386) ENTRY(phys_startup_32) -jiffies = jiffies_64; #else OUTPUT_ARCH(i386:x86-64) ENTRY(phys_startup_64) -jiffies_64 = jiffies; #endif +jiffies = jiffies_64; + #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) /* * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 13bda3fcf42b..b60ffd1b3ae2 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -611,8 +611,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); unsigned phys_as = entry->eax & 0xff; - if (!g_phys_as) + /* + * Use bare metal's MAXPHADDR if the CPU doesn't report guest + * MAXPHYADDR separately, or if TDP (NPT) is disabled, as the + * guest version "applies only to guests using nested paging". + */ + if (!g_phys_as || !tdp_enabled) g_phys_as = phys_as; + entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; /* diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 466028623e1a..827d54a5126e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3524,7 +3524,7 @@ static int em_rdpid(struct x86_emulate_ctxt *ctxt) u64 tsc_aux = 0; if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux)) - return emulate_gp(ctxt, 0); + return emulate_ud(ctxt); ctxt->dst.val = tsc_aux; return X86EMUL_CONTINUE; } @@ -3922,6 +3922,12 @@ static int em_clflush(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_clflushopt(struct x86_emulate_ctxt *ctxt) +{ + /* emulating clflushopt regardless of cpuid */ + return X86EMUL_CONTINUE; +} + static int em_movsxd(struct x86_emulate_ctxt *ctxt) { ctxt->dst.val = (s32) ctxt->src.val; @@ -4411,7 +4417,7 @@ static const struct opcode group11[] = { }; static const struct gprefix pfx_0f_ae_7 = { - I(SrcMem | ByteOp, em_clflush), N, N, N, + I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N, }; static const struct group_dual group15 = { { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a5b533aea958..2ff0fe32c015 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3679,7 +3679,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51); rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd | - nonleaf_bit8_rsvd | gbpages_bit_rsvd | + gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 51); diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c index 822829f00590..04890ac518d0 100644 --- a/arch/x86/kvm/pmu_intel.c +++ b/arch/x86/kvm/pmu_intel.c @@ -29,7 +29,7 @@ static struct kvm_event_hw_type_mapping intel_arch_events[] = { [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES }, [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, - [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES }, + [7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES }, }; /* mapping between fixed pmc index and intel_arch_events array */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9fc536657492..77bee73faebc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2250,7 +2250,7 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr dst->iopm_base_pa = from->iopm_base_pa; dst->msrpm_base_pa = from->msrpm_base_pa; dst->tsc_offset = from->tsc_offset; - dst->asid = from->asid; + /* asid not copied, it is handled manually for svm->vmcb. */ dst->tlb_ctl = from->tlb_ctl; dst->int_ctl = from->int_ctl; dst->int_vector = from->int_vector; @@ -2564,7 +2564,11 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) svm->nested.intercept = nested_vmcb->control.intercept; svm_flush_tlb(&svm->vcpu); - svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; + svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl & + (V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK); + + svm->vmcb->control.int_ctl |= V_INTR_MASKING_MASK; + if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) svm->vcpu.arch.hflags |= HF_VINTR_MASK; else @@ -2927,7 +2931,7 @@ static int cr_interception(struct vcpu_svm *svm) err = 0; if (cr >= 16) { /* mov to cr */ cr -= 16; - val = kvm_register_read(&svm->vcpu, reg); + val = kvm_register_readl(&svm->vcpu, reg); switch (cr) { case 0: if (!check_selective_cr0_intercepted(svm, val)) @@ -2972,7 +2976,7 @@ static int cr_interception(struct vcpu_svm *svm) kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; } - kvm_register_write(&svm->vcpu, reg, val); + kvm_register_writel(&svm->vcpu, reg, val); } kvm_complete_insn_gp(&svm->vcpu, err); @@ -3004,13 +3008,13 @@ static int dr_interception(struct vcpu_svm *svm) if (dr >= 16) { /* mov to DRn */ if (!kvm_require_dr(&svm->vcpu, dr - 16)) return 1; - val = kvm_register_read(&svm->vcpu, reg); + val = kvm_register_readl(&svm->vcpu, reg); kvm_set_dr(&svm->vcpu, dr - 16, val); } else { if (!kvm_require_dr(&svm->vcpu, dr)) return 1; kvm_get_dr(&svm->vcpu, dr, &val); - kvm_register_write(&svm->vcpu, reg, val); + kvm_register_writel(&svm->vcpu, reg, val); } skip_emulated_instruction(&svm->vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2634b4556202..6646edaa5123 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5441,7 +5441,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, */ static void kvm_machine_check(void) { -#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) +#if defined(CONFIG_X86_MCE) struct pt_regs regs = { .cs = 3, /* Fake ring 3 no matter what the guest ran on */ .flags = X86_EFLAGS_IF, @@ -7844,7 +7844,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) return true; } - switch (exit_reason) { + switch ((u16)exit_reason) { case EXIT_REASON_EXCEPTION_NMI: if (is_nmi(intr_info)) return false; @@ -8235,6 +8235,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) (exit_reason != EXIT_REASON_EXCEPTION_NMI && exit_reason != EXIT_REASON_EPT_VIOLATION && exit_reason != EXIT_REASON_PML_FULL && + exit_reason != EXIT_REASON_APIC_ACCESS && exit_reason != EXIT_REASON_TASK_SWITCH)) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3adc255e69cb..8dce61ca934b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2162,7 +2162,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return kvm_mtrr_set_msr(vcpu, msr, data); case MSR_IA32_APICBASE: return kvm_set_apic_base(vcpu, msr_info); - case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: + case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff: return kvm_x2apic_msr_write(vcpu, msr, data); case MSR_IA32_TSCDEADLINE: kvm_set_lapic_tscdeadline_msr(vcpu, data); @@ -2172,6 +2172,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated) { s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; adjust_tsc_offset_guest(vcpu, adj); + /* Before back to guest, tsc_timestamp must be adjusted + * as well, otherwise guest's percpu pvclock time could jump. + */ + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); } vcpu->arch.ia32_tsc_adjust_msr = data; } @@ -2432,7 +2436,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_APICBASE: msr_info->data = kvm_get_apic_base(vcpu); break; - case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: + case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff: return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data); break; case MSR_IA32_TSCDEADLINE: @@ -2941,7 +2945,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, unsigned bank_num = mcg_cap & 0xff, bank; r = -EINVAL; - if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) + if (!bank_num || bank_num > KVM_MAX_MCE_BANKS) goto out; if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000)) goto out; @@ -4013,10 +4017,13 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&u.ps, argp, sizeof u.ps)) goto out; + mutex_lock(&kvm->lock); r = -ENXIO; if (!kvm->arch.vpit) - goto out; + goto set_pit_out; r = kvm_vm_ioctl_set_pit(kvm, &u.ps); +set_pit_out: + mutex_unlock(&kvm->lock); break; } case KVM_GET_PIT2: { @@ -4036,10 +4043,13 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&u.ps2, argp, sizeof(u.ps2))) goto out; + mutex_lock(&kvm->lock); r = -ENXIO; if (!kvm->arch.vpit) - goto out; + goto set_pit2_out; r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2); +set_pit2_out: + mutex_unlock(&kvm->lock); break; } case KVM_REINJECT_CONTROL: { @@ -6010,6 +6020,7 @@ void kvm_arch_exit(void) unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block); #ifdef CONFIG_X86_64 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier); + cancel_work_sync(&pvclock_gtod_work); #endif kvm_x86_ops = NULL; kvm_mmu_module_exit(); @@ -6717,6 +6728,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(vcpu->arch.eff_db[3], 3); set_debugreg(vcpu->arch.dr6, 6); vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; + } else if (unlikely(hw_breakpoint_active())) { + set_debugreg(0, 7); } kvm_x86_ops->run(vcpu); diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c index 518532e6a3fa..8a3bc242c5e9 100644 --- a/arch/x86/lib/msr-smp.c +++ b/arch/x86/lib/msr-smp.c @@ -239,7 +239,7 @@ static void __wrmsr_safe_regs_on_cpu(void *info) rv->err = wrmsr_safe_regs(rv->regs); } -int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) +int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) { int err; struct msr_regs_info rv; @@ -252,7 +252,7 @@ int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) } EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu); -int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) +int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]) { int err; struct msr_regs_info rv; diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S index d258f59564e1..3b40c98bbbd4 100644 --- a/arch/x86/math-emu/wm_sqrt.S +++ b/arch/x86/math-emu/wm_sqrt.S @@ -208,7 +208,7 @@ sqrt_stage_2_finish: #ifdef PARANOID /* It should be possible to get here only if the arg is ffff....ffff */ - cmp $0xffffffff,FPU_fsqrt_arg_1 + cmpl $0xffffffff,FPU_fsqrt_arg_1 jnz sqrt_stage_2_error #endif /* PARANOID */ diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 0f1c6fc3ddd8..47770ccab6d7 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -15,6 +15,7 @@ #include <linux/debugfs.h> #include <linux/mm.h> #include <linux/module.h> +#include <linux/sched.h> #include <linux/seq_file.h> #include <asm/pgtable.h> @@ -407,6 +408,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, } else note_page(m, &st, __pgprot(0), 1); + cond_resched(); start++; } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index f00eb52c16a6..17eb564901ca 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -109,8 +109,6 @@ __ref void *alloc_low_pages(unsigned int num) } else { pfn = pgt_buf_end; pgt_buf_end += num; - printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n", - pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1); } for (i = 0; i < num; i++) { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d76ec9348cff..547d80fc76d7 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1182,21 +1182,21 @@ int kern_addr_valid(unsigned long addr) return 0; pud = pud_offset(pgd, addr); - if (pud_none(*pud)) + if (!pud_present(*pud)) return 0; if (pud_large(*pud)) return pfn_valid(pud_pfn(*pud)); pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) + if (!pmd_present(*pmd)) return 0; if (pmd_large(*pmd)) return pfn_valid(pmd_pfn(*pmd)); pte = pte_offset_kernel(pmd, addr); - if (pte_none(*pte)) + if (!pte_present(*pte)) return 0; return pfn_valid(pte_pfn(*pte)); diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 0057a7accfb1..5448ad4d0703 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -385,7 +385,7 @@ static void enter_uniprocessor(void) int cpu; int err; - if (downed_cpus == NULL && + if (!cpumask_available(downed_cpus) && !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { pr_notice("Failed to allocate mask\n"); goto out; @@ -415,7 +415,7 @@ static void leave_uniprocessor(void) int cpu; int err; - if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0) + if (!cpumask_available(downed_cpus) || cpumask_weight(downed_cpus) == 0) return; pr_notice("Re-enabling CPUs...\n"); for_each_cpu(cpu, downed_cpus) { diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3ed4753280aa..289518bb0e8d 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -720,6 +720,8 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) } free_page((unsigned long)pmd_sv); + + pgtable_pmd_page_dtor(virt_to_page(pmd)); free_page((unsigned long)pmd); return 1; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index dd9a861fd526..82f8cd0a3af9 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -145,6 +145,19 @@ static bool is_ereg(u32 reg) BIT(BPF_REG_9)); } +/* + * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64 + * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte + * of encoding. al,cl,dl,bl have simpler encoding. + */ +static bool is_ereg_8l(u32 reg) +{ + return is_ereg(reg) || + (1 << reg) & (BIT(BPF_REG_1) | + BIT(BPF_REG_2) | + BIT(BPF_REG_FP)); +} + /* add modifiers if 'reg' maps to x64 registers r8..r15 */ static u8 add_1mod(u8 byte, u32 reg) { @@ -731,9 +744,8 @@ st: if (is_imm8(insn->off)) /* STX: *(u8*)(dst_reg + off) = src_reg */ case BPF_STX | BPF_MEM | BPF_B: /* emit 'mov byte ptr [rax + off], al' */ - if (is_ereg(dst_reg) || is_ereg(src_reg) || - /* have to add extra byte for x86 SIL, DIL regs */ - src_reg == BPF_REG_1 || src_reg == BPF_REG_2) + if (is_ereg(dst_reg) || is_ereg_8l(src_reg)) + /* Add extra byte for eregs or SIL,DIL,BPL in src_reg */ EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); else EMIT1(0x88); @@ -1026,7 +1038,16 @@ common_load: } if (image) { - if (unlikely(proglen + ilen > oldproglen)) { + /* + * When populating the image, assert that: + * + * i) We do not write beyond the allocated space, and + * ii) addrs[i] did not change from the prior run, in order + * to validate assumptions made for computing branch + * displacements. + */ + if (unlikely(proglen + ilen > oldproglen || + proglen + ilen != addrs[i])) { pr_err("bpf_jit_compile fatal error\n"); return -EFAULT; } diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk index fd1ab80be0de..a4cf678cf5c8 100644 --- a/arch/x86/tools/chkobjdump.awk +++ b/arch/x86/tools/chkobjdump.awk @@ -10,6 +10,7 @@ BEGIN { /^GNU objdump/ { verstr = "" + gsub(/\(.*\)/, ""); for (i = 3; i <= NF; i++) if (match($(i), "^[0-9]")) { verstr = $(i); diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 5b6c8486a0be..d1c3f82c7882 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -839,9 +839,11 @@ static int do_reloc32(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, case R_386_PC32: case R_386_PC16: case R_386_PC8: + case R_386_PLT32: /* - * NONE can be ignored and PC relative relocations don't - * need to be adjusted. + * NONE can be ignored and PC relative relocations don't need + * to be adjusted. Because sym must be defined, R_386_PLT32 can + * be treated the same way as R_386_PC32. */ break; @@ -882,9 +884,11 @@ static int do_reloc_real(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, case R_386_PC32: case R_386_PC16: case R_386_PC8: + case R_386_PLT32: /* - * NONE can be ignored and PC relative relocations don't - * need to be adjusted. + * NONE can be ignored and PC relative relocations don't need + * to be adjusted. Because sym must be defined, R_386_PLT32 can + * be treated the same way as R_386_PC32. */ break; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 82fd84d5e1aa..28725a6ed5de 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -861,8 +861,8 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) preempt_enable(); } -static void xen_convert_trap_info(const struct desc_ptr *desc, - struct trap_info *traps) +static unsigned xen_convert_trap_info(const struct desc_ptr *desc, + struct trap_info *traps, bool full) { unsigned in, out, count; @@ -872,17 +872,18 @@ static void xen_convert_trap_info(const struct desc_ptr *desc, for (in = out = 0; in < count; in++) { gate_desc *entry = (gate_desc*)(desc->address) + in; - if (cvt_gate_to_trap(in, entry, &traps[out])) + if (cvt_gate_to_trap(in, entry, &traps[out]) || full) out++; } - traps[out].address = 0; + + return out; } void xen_copy_trap_info(struct trap_info *traps) { const struct desc_ptr *desc = this_cpu_ptr(&idt_desc); - xen_convert_trap_info(desc, traps); + xen_convert_trap_info(desc, traps, true); } /* Load a new IDT into Xen. In principle this can be per-CPU, so we @@ -892,6 +893,7 @@ static void xen_load_idt(const struct desc_ptr *desc) { static DEFINE_SPINLOCK(lock); static struct trap_info traps[257]; + unsigned out; trace_xen_cpu_load_idt(desc); @@ -899,7 +901,8 @@ static void xen_load_idt(const struct desc_ptr *desc) memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc)); - xen_convert_trap_info(desc, traps); + out = xen_convert_trap_info(desc, traps, false); + memset(&traps[out], 0, sizeof(traps[0])); xen_mc_flush(); if (HYPERVISOR_set_trap_table(traps)) @@ -1240,10 +1243,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .iret = xen_iret, #ifdef CONFIG_X86_64 - .usergs_sysret32 = xen_sysret32, .usergs_sysret64 = xen_sysret64, -#else - .irq_enable_sysexit = xen_sysexit, #endif .load_tr_desc = paravirt_nop, diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index cab9f766bb06..af0ebe18248a 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -623,8 +623,8 @@ int xen_alloc_p2m_entry(unsigned long pfn) } /* Expanded the p2m? */ - if (pfn > xen_p2m_last_pfn) { - xen_p2m_last_pfn = pfn; + if (pfn >= xen_p2m_last_pfn) { + xen_p2m_last_pfn = ALIGN(pfn + 1, P2M_PER_PAGE); HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn; } @@ -723,9 +723,12 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, for (i = 0; i < count; i++) { unsigned long mfn, pfn; + struct gnttab_unmap_grant_ref unmap[2]; + int rc; /* Do not add to override if the map failed. */ - if (map_ops[i].status) + if (map_ops[i].status != GNTST_okay || + (kmap_ops && kmap_ops[i].status != GNTST_okay)) continue; if (map_ops[i].flags & GNTMAP_contains_pte) { @@ -739,10 +742,46 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned"); - if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { - ret = -ENOMEM; - goto out; + if (likely(set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) + continue; + + /* + * Signal an error for this slot. This in turn requires + * immediate unmapping. + */ + map_ops[i].status = GNTST_general_error; + unmap[0].host_addr = map_ops[i].host_addr, + unmap[0].handle = map_ops[i].handle; + map_ops[i].handle = ~0; + if (map_ops[i].flags & GNTMAP_device_map) + unmap[0].dev_bus_addr = map_ops[i].dev_bus_addr; + else + unmap[0].dev_bus_addr = 0; + + if (kmap_ops) { + kmap_ops[i].status = GNTST_general_error; + unmap[1].host_addr = kmap_ops[i].host_addr, + unmap[1].handle = kmap_ops[i].handle; + kmap_ops[i].handle = ~0; + if (kmap_ops[i].flags & GNTMAP_device_map) + unmap[1].dev_bus_addr = kmap_ops[i].dev_bus_addr; + else + unmap[1].dev_bus_addr = 0; } + + /* + * Pre-populate both status fields, to be recognizable in + * the log message below. + */ + unmap[0].status = 1; + unmap[1].status = 1; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + unmap, 1 + !!kmap_ops); + if (rc || unmap[0].status != GNTST_okay || + unmap[1].status != GNTST_okay) + pr_err_once("gnttab unmap failed: rc=%d st0=%d st1=%d\n", + rc, unmap[0].status, unmap[1].status); } out: @@ -763,17 +802,15 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); unsigned long pfn = page_to_pfn(pages[i]); - if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { + if (mfn != INVALID_P2M_ENTRY && (mfn & FOREIGN_FRAME_BIT)) + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + else ret = -EINVAL; - goto out; - } - - set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } if (kunmap_ops) ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, - kunmap_ops, count); -out: + kunmap_ops, count) ?: ret; + return ret; } EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 29e50d1229bc..ee48506ca151 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -116,6 +116,7 @@ asmlinkage __visible void cpu_bringup_and_idle(int cpu) #endif cpu_bringup(); cpu_startup_entry(CPUHP_ONLINE); + prevent_tail_call_optimization(); } static void xen_smp_intr_free(unsigned int cpu) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 85872a08994a..e9fc0f7df0da 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -301,10 +301,20 @@ void xen_init_lock_cpu(int cpu) void xen_uninit_lock_cpu(int cpu) { + int irq; + if (!xen_pvspin) return; - unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); + /* + * When booting the kernel with 'mitigations=auto,nosmt', the secondary + * CPUs are not activated, and lock_kicker_irq is not initialized. + */ + irq = per_cpu(lock_kicker_irq, cpu); + if (irq == -1) + return; + + unbind_from_irqhandler(irq, NULL); per_cpu(lock_kicker_irq, cpu) = -1; kfree(per_cpu(irq_name, cpu)); per_cpu(irq_name, cpu) = NULL; diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index fd92a64d748e..feb6d40a0860 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -35,20 +35,6 @@ check_events: ret /* - * We can't use sysexit directly, because we're not running in ring0. - * But we can easily fake it up using iret. Assuming xen_sysexit is - * jumped to with a standard stack frame, we can just strip it back to - * a standard iret frame and use iret. - */ -ENTRY(xen_sysexit) - movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */ - orl $X86_EFLAGS_IF, PT_EFLAGS(%esp) - lea PT_EIP(%esp), %esp - - jmp xen_iret -ENDPROC(xen_sysexit) - -/* * This is run where a normal iret would be run, with the same stack setup: * 8: eflags * 4: cs diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 1399423f3418..4140b070f2e9 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -139,9 +139,6 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long); /* These are not functions, and cannot be called normally */ __visible void xen_iret(void); -#ifdef CONFIG_X86_32 -__visible void xen_sysexit(void); -#endif __visible void xen_sysret32(void); __visible void xen_sysret64(void); __visible void xen_adjust_exception_frame(void); |