diff options
Diffstat (limited to 'arch/x86/include')
34 files changed, 439 insertions, 147 deletions
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 2188b5af8167..f39fd349cef6 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -50,7 +50,7 @@ static inline void generic_apic_probe(void) #ifdef CONFIG_X86_LOCAL_APIC -extern unsigned int apic_verbosity; +extern int apic_verbosity; extern int local_apic_timer_c2_ok; extern int disable_apic; diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 14635c5ea025..305c6eed9141 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -49,7 +49,7 @@ static __always_inline void atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (v->counter) - : "ir" (i)); + : "ir" (i) : "memory"); } /** @@ -63,7 +63,7 @@ static __always_inline void atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" : "+m" (v->counter) - : "ir" (i)); + : "ir" (i) : "memory"); } /** @@ -89,7 +89,7 @@ static __always_inline bool atomic_sub_and_test(int i, atomic_t *v) static __always_inline void atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" - : "+m" (v->counter)); + : "+m" (v->counter) :: "memory"); } /** @@ -101,7 +101,7 @@ static __always_inline void atomic_inc(atomic_t *v) static __always_inline void atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" - : "+m" (v->counter)); + : "+m" (v->counter) :: "memory"); } /** @@ -249,19 +249,6 @@ static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) return c; } -/** - * atomic_inc_short - increment of a short integer - * @v: pointer to type int - * - * Atomically adds 1 to @v - * Returns the new value of @u - */ -static __always_inline short int atomic_inc_short(short int *v) -{ - asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v)); - return *v; -} - #ifdef CONFIG_X86_32 # include <asm/atomic64_32.h> #else diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 89ed2f6ae2f7..a3248402c36b 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -44,7 +44,7 @@ static __always_inline void atomic64_add(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "addq %1,%0" : "=m" (v->counter) - : "er" (i), "m" (v->counter)); + : "er" (i), "m" (v->counter) : "memory"); } /** @@ -58,7 +58,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "subq %1,%0" : "=m" (v->counter) - : "er" (i), "m" (v->counter)); + : "er" (i), "m" (v->counter) : "memory"); } /** @@ -85,7 +85,7 @@ static __always_inline void atomic64_inc(atomic64_t *v) { asm volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) - : "m" (v->counter)); + : "m" (v->counter) : "memory"); } /** @@ -98,7 +98,7 @@ static __always_inline void atomic64_dec(atomic64_t *v) { asm volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) - : "m" (v->counter)); + : "m" (v->counter) : "memory"); } /** diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index eb53c2c78a1f..a0f450b21d67 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -105,8 +105,8 @@ do { \ #endif /* Atomic operations are already serializing on x86 */ -#define __smp_mb__before_atomic() barrier() -#define __smp_mb__after_atomic() barrier() +#define __smp_mb__before_atomic() do { } while (0) +#define __smp_mb__after_atomic() do { } while (0) #include <asm-generic/barrier.h> diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index 4a8cb8d7cbd5..588d8fbd1e6d 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -17,6 +17,20 @@ * Note: efi_info is commonly left uninitialized, but that field has a * private magic, so it is better to leave it unchanged. */ + +#define sizeof_mbr(type, member) ({ sizeof(((type *)0)->member); }) + +#define BOOT_PARAM_PRESERVE(struct_member) \ + { \ + .start = offsetof(struct boot_params, struct_member), \ + .len = sizeof_mbr(struct boot_params, struct_member), \ + } + +struct boot_params_to_save { + unsigned int start; + unsigned int len; +}; + static void sanitize_boot_params(struct boot_params *boot_params) { /* @@ -35,19 +49,40 @@ static void sanitize_boot_params(struct boot_params *boot_params) */ if (boot_params->sentinel) { /* fields in boot_params are left uninitialized, clear them */ - memset(&boot_params->ext_ramdisk_image, 0, - (char *)&boot_params->efi_info - - (char *)&boot_params->ext_ramdisk_image); - memset(&boot_params->kbd_status, 0, - (char *)&boot_params->hdr - - (char *)&boot_params->kbd_status); - memset(&boot_params->_pad7[0], 0, - (char *)&boot_params->edd_mbr_sig_buffer[0] - - (char *)&boot_params->_pad7[0]); - memset(&boot_params->_pad8[0], 0, - (char *)&boot_params->eddbuf[0] - - (char *)&boot_params->_pad8[0]); - memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9)); + static struct boot_params scratch; + char *bp_base = (char *)boot_params; + char *save_base = (char *)&scratch; + int i; + + const struct boot_params_to_save to_save[] = { + BOOT_PARAM_PRESERVE(screen_info), + BOOT_PARAM_PRESERVE(apm_bios_info), + BOOT_PARAM_PRESERVE(tboot_addr), + BOOT_PARAM_PRESERVE(ist_info), + BOOT_PARAM_PRESERVE(hd0_info), + BOOT_PARAM_PRESERVE(hd1_info), + BOOT_PARAM_PRESERVE(sys_desc_table), + BOOT_PARAM_PRESERVE(olpc_ofw_header), + BOOT_PARAM_PRESERVE(efi_info), + BOOT_PARAM_PRESERVE(alt_mem_k), + BOOT_PARAM_PRESERVE(scratch), + BOOT_PARAM_PRESERVE(e820_entries), + BOOT_PARAM_PRESERVE(eddbuf_entries), + BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries), + BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer), + BOOT_PARAM_PRESERVE(hdr), + BOOT_PARAM_PRESERVE(e820_map), + BOOT_PARAM_PRESERVE(eddbuf), + }; + + memset(&scratch, 0, sizeof(scratch)); + + for (i = 0; i < ARRAY_SIZE(to_save); i++) { + memcpy(save_base + to_save[i].start, + bp_base + to_save[i].start, to_save[i].len); + } + + memcpy(boot_params, save_base, sizeof(*boot_params)); } } diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 98444b77fbe3..fb457ba8ccc6 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -192,7 +192,8 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ - +#define X86_FEATURE_FENCE_SWAPGS_USER ( 7*32+10) /* "" LFENCE in user entry SWAPGS path */ +#define X86_FEATURE_FENCE_SWAPGS_KERNEL ( 7*32+11) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ @@ -201,9 +202,6 @@ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ -#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ - #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ @@ -214,6 +212,7 @@ #define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ +#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -271,10 +270,12 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ -#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -304,6 +305,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ +#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ @@ -315,6 +317,7 @@ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ @@ -352,5 +355,10 @@ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ +#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ +#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ +#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h index f498411f2500..1b15304dd098 100644 --- a/arch/x86/include/asm/crash.h +++ b/arch/x86/include/asm/crash.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_CRASH_H #define _ASM_X86_CRASH_H +struct kimage; + int crash_load_segments(struct kimage *image); int crash_copy_backup_region(struct kimage *image); int crash_setup_memmap_entries(struct kimage *image, diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 8554f960e21b..61d6f2c05757 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -142,7 +142,7 @@ extern pte_t *kmap_pte; extern pte_t *pkmap_page_table; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); -void native_set_fixmap(enum fixed_addresses idx, +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags); #ifndef CONFIG_PARAVIRT diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index b3e32b010ab1..c2c01f84df75 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +#define POP_SS_OPCODE 0x1f +#define MOV_SREG_OPCODE 0x8e + +/* + * Intel SDM Vol.3A 6.8.3 states; + * "Any single-step trap that would be delivered following the MOV to SS + * instruction or POP to SS instruction (because EFLAGS.TF is 1) is + * suppressed." + * This function returns true if @insn is MOV SS or POP SS. On these + * instructions, single stepping is suppressed. + */ +static inline int insn_masking_exception(struct insn *insn) +{ + return insn->opcode.bytes[0] == POP_SS_OPCODE || + (insn->opcode.bytes[0] == MOV_SREG_OPCODE && + X86_MODRM_REG(insn->modrm.bytes[0]) == 2); +} + #endif /* _ASM_X86_INSN_H */ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 75b748a1deb8..74ee597beb3e 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -5,7 +5,7 @@ * "Big Core" Processors (Branded as Core, Xeon, etc...) * * The "_X" parts are generally the EP and EX Xeons, or the - * "Extreme" ones, like Broadwell-E. + * "Extreme" ones, like Broadwell-E, or Atom microserver. * * Things ending in "2" are usually because we have no better * name for them. There's no processor called "SILVERMONT2". @@ -50,19 +50,24 @@ /* "Small Core" Processors (Atom) */ -#define INTEL_FAM6_ATOM_PINEVIEW 0x1C -#define INTEL_FAM6_ATOM_LINCROFT 0x26 -#define INTEL_FAM6_ATOM_PENWELL 0x27 -#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35 -#define INTEL_FAM6_ATOM_CEDARVIEW 0x36 -#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */ -#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ -#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ -#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ -#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ -#define INTEL_FAM6_ATOM_GOLDMONT 0x5C -#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ -#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A +#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ +#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ + +#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */ +#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */ +#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */ + +#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */ +#define INTEL_FAM6_ATOM_SILVERMONT_X 0x4D /* Avaton, Rangely */ +#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */ + +#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */ +#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */ + +#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ +#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */ +#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ +#define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */ /* Xeon Phi */ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 508a062e6cf1..0c8f4281b151 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -5,6 +5,8 @@ #ifndef __ASSEMBLY__ +#include <asm/nospec-branch.h> + /* Provide __cpuidle; we can't safely include <linux/cpu.h> */ #define __cpuidle __attribute__((__section__(".cpuidle.text"))) @@ -53,11 +55,13 @@ static inline void native_irq_enable(void) static inline __cpuidle void native_safe_halt(void) { + mds_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static inline __cpuidle void native_halt(void) { + mds_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 282630e4c6ea..1624a7ffa95d 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -66,7 +66,7 @@ struct kimage; /* Memory to backup during crash kdump */ #define KEXEC_BACKUP_SRC_START (0UL) -#define KEXEC_BACKUP_SRC_END (640 * 1024UL) /* 640K */ +#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */ /* * CPU does not save ss and sp on stack if execution is already diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9a8167b175d5..d2c14a96ec28 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -261,6 +261,7 @@ struct kvm_rmap_head { struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; + struct list_head lpage_disallowed_link; /* * The following two entries are used to key the shadow page in the @@ -273,6 +274,7 @@ struct kvm_mmu_page { /* hold the gfn of each spte inside spt */ gfn_t *gfns; bool unsync; + bool lpage_disallowed; /* Can't be replaced by an equiv large page */ int root_count; /* Currently serving as active root */ unsigned int unsync_children; struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ @@ -487,6 +489,7 @@ struct kvm_vcpu_arch { bool tpr_access_reporting; u64 ia32_xss; u64 microcode_version; + u64 arch_capabilities; /* * Paging state of the vcpu @@ -723,6 +726,7 @@ struct kvm_arch { */ struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; + struct list_head lpage_disallowed_mmu_pages; struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; @@ -797,6 +801,8 @@ struct kvm_arch { bool x2apic_format; bool x2apic_broadcast_quirk_disabled; + + struct task_struct *nx_lpage_recovery_thread; }; struct kvm_vm_stat { @@ -810,6 +816,7 @@ struct kvm_vm_stat { ulong mmu_unsync; ulong remote_tlb_flush; ulong lpages; + ulong nx_lpage_splits; }; struct kvm_vcpu_stat { @@ -1308,25 +1315,29 @@ enum { #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) +asmlinkage void __noreturn kvm_spurious_fault(void); + /* * Hardware virtualization extension instructions may fault if a * reboot turns off virtualization while processes are running. - * Trap the fault and ignore the instruction if that happens. + * Usually after catching the fault we just panic; during reboot + * instead the instruction is ignored. */ -asmlinkage void kvm_spurious_fault(void); - -#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ - "666: " insn "\n\t" \ - "668: \n\t" \ - ".pushsection .fixup, \"ax\" \n" \ - "667: \n\t" \ - cleanup_insn "\n\t" \ - "cmpb $0, kvm_rebooting \n\t" \ - "jne 668b \n\t" \ - __ASM_SIZE(push) " $666b \n\t" \ - "jmp kvm_spurious_fault \n\t" \ - ".popsection \n\t" \ - _ASM_EXTABLE(666b, 667b) +#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ + "666: \n\t" \ + insn "\n\t" \ + "jmp 668f \n\t" \ + "667: \n\t" \ + "call kvm_spurious_fault \n\t" \ + "668: \n\t" \ + ".pushsection .fixup, \"ax\" \n\t" \ + "700: \n\t" \ + cleanup_insn "\n\t" \ + "cmpb $0, kvm_rebooting\n\t" \ + "je 667b \n\t" \ + "jmp 668b \n\t" \ + ".popsection \n\t" \ + _ASM_EXTABLE(666b, 700b) #define __kvm_handle_fault_on_reboot(insn) \ ____kvm_handle_fault_on_reboot(insn, "") diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h index 5e69154c9f07..c8e472e2c896 100644 --- a/arch/x86/include/asm/microcode_intel.h +++ b/arch/x86/include/asm/microcode_intel.h @@ -52,6 +52,21 @@ struct extended_sigtable { #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) +static inline u32 intel_get_microcode_revision(void) +{ + u32 rev, dummy; + + native_wrmsrl(MSR_IA32_UCODE_REV, 0); + + /* As documented in the SDM: Do a CPUID 1 here */ + native_cpuid_eax(1); + + /* get the current revision from MSR 0x8B */ + native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev); + + return rev; +} + extern int has_newer_microcode(void *mc, unsigned int csig, int cpf, int rev); extern int microcode_sanity_check(void *mc, int print_err); extern int find_matching_signature(void *mc, unsigned int csig, int cpf); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 9963e21ac443..8d162e0f2881 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_MSR_INDEX_H #define _ASM_X86_MSR_INDEX_H +#include <linux/bits.h> + /* * CPU model specific register (MSR) numbers. * @@ -38,13 +40,14 @@ /* Intel MSRs. Some also available on other CPUs */ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ -#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ -#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ +#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ +#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ -#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ +#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ #define MSR_IA32_PERFCTR0 0x000000c1 #define MSR_IA32_PERFCTR1 0x000000c2 @@ -61,24 +64,45 @@ #define MSR_MTRRcap 0x000000fe #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a -#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ -#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ -#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */ -#define ARCH_CAP_SSB_NO (1 << 4) /* - * Not susceptible to Speculative Store Bypass - * attack, so no Speculative Store Bypass - * control required. - */ +#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ +#define ARCH_CAP_SSB_NO BIT(4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Speculative Store Bypass + * control required. + */ +#define ARCH_CAP_MDS_NO BIT(5) /* + * Not susceptible to + * Microarchitectural Data + * Sampling (MDS) vulnerabilities. + */ +#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /* + * The processor is not susceptible to a + * machine check error due to modifying the + * code page size along with either the + * physical address or cache type + * without TLB invalidation. + */ +#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ +#define ARCH_CAP_TAA_NO BIT(8) /* + * Not susceptible to + * TSX Async Abort (TAA) vulnerabilities. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b -#define L1D_FLUSH (1 << 0) /* - * Writeback and invalidate the - * L1 data cache. - */ +#define L1D_FLUSH BIT(0) /* + * Writeback and invalidate the + * L1 data cache. + */ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e +#define MSR_IA32_TSX_CTRL 0x00000122 +#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ +#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ + #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 #define MSR_IA32_SYSENTER_EIP 0x00000176 @@ -305,6 +329,7 @@ #define MSR_AMD64_PATCH_LEVEL 0x0000008b #define MSR_AMD64_TSC_RATIO 0xc0000104 #define MSR_AMD64_NB_CFG 0xc001001f +#define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_PATCH_LOADER 0xc0010020 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index f37f2d8a2989..58b1b766e84e 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -4,6 +4,7 @@ #include <linux/sched.h> #include <asm/cpufeature.h> +#include <asm/nospec-branch.h> #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf @@ -18,7 +19,7 @@ #define MWAIT_ECX_INTERRUPT_BREAK 0x1 #define MWAITX_ECX_TIMER_ENABLE BIT(1) #define MWAITX_MAX_LOOPS ((u32)-1) -#define MWAITX_DISABLE_CSTATES 0xf +#define MWAITX_DISABLE_CSTATES 0xf0 static inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) @@ -38,6 +39,8 @@ static inline void __monitorx(const void *eax, unsigned long ecx, static inline void __mwait(unsigned long eax, unsigned long ecx) { + mds_idle_clear_cpu_buffers(); + /* "mwait %eax, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xc9;" :: "a" (eax), "c" (ecx)); @@ -72,6 +75,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) static inline void __mwaitx(unsigned long eax, unsigned long ebx, unsigned long ecx) { + /* No MDS buffer clear as this is AMD/HYGON only */ + /* "mwaitx %eax, %ebx, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xfb;" :: "a" (eax), "b" (ebx), "c" (ecx)); @@ -79,6 +84,8 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { + mds_idle_clear_cpu_buffers(); + trace_hardirqs_on(); /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 1b4132161c1f..8d56d701b5f7 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -3,6 +3,8 @@ #ifndef _ASM_X86_NOSPEC_BRANCH_H_ #define _ASM_X86_NOSPEC_BRANCH_H_ +#include <linux/static_key.h> + #include <asm/alternative.h> #include <asm/alternative-asm.h> #include <asm/cpufeatures.h> @@ -194,7 +196,7 @@ " lfence;\n" \ " jmp 902b;\n" \ " .align 16\n" \ - "903: addl $4, %%esp;\n" \ + "903: lea 4(%%esp), %%esp;\n" \ " pushl %[thunk_target];\n" \ " ret;\n" \ " .align 16\n" \ @@ -214,10 +216,17 @@ enum spectre_v2_mitigation { SPECTRE_V2_RETPOLINE_MINIMAL_AMD, SPECTRE_V2_RETPOLINE_GENERIC, SPECTRE_V2_RETPOLINE_AMD, - SPECTRE_V2_IBRS, SPECTRE_V2_IBRS_ENHANCED, }; +/* The indirect branch speculation control variants */ +enum spectre_v2_user_mitigation { + SPECTRE_V2_USER_NONE, + SPECTRE_V2_USER_STRICT, + SPECTRE_V2_USER_PRCTL, + SPECTRE_V2_USER_SECCOMP, +}; + /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, @@ -295,6 +304,60 @@ do { \ preempt_enable(); \ } while (0) +DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); +DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); +DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); + +DECLARE_STATIC_KEY_FALSE(mds_user_clear); +DECLARE_STATIC_KEY_FALSE(mds_idle_clear); + +#include <asm/segment.h> + +/** + * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * + * This uses the otherwise unused and obsolete VERW instruction in + * combination with microcode which triggers a CPU buffer flush when the + * instruction is executed. + */ +static inline void mds_clear_cpu_buffers(void) +{ + static const u16 ds = __KERNEL_DS; + + /* + * Has to be the memory-operand variant because only that + * guarantees the CPU buffer flush functionality according to + * documentation. The register-operand variant does not. + * Works with any segment selector, but a valid writable + * data segment is the fastest variant. + * + * "cc" clobber is required because VERW modifies ZF. + */ + asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); +} + +/** + * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +static inline void mds_user_clear_cpu_buffers(void) +{ + if (static_branch_likely(&mds_user_clear)) + mds_clear_cpu_buffers(); +} + +/** + * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +static inline void mds_idle_clear_cpu_buffers(void) +{ + if (static_branch_likely(&mds_idle_clear)) + mds_clear_cpu_buffers(); +} + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index f353061bba1d..81d5ea71bbe9 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -200,16 +200,20 @@ struct x86_pmu_capability { #define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) #define IBSCTL_LVT_OFFSET_MASK 0x0F -/* ibs fetch bits/masks */ +/* IBS fetch bits/masks */ #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) #define IBS_FETCH_ENABLE (1ULL<<48) #define IBS_FETCH_CNT 0xFFFF0000ULL #define IBS_FETCH_MAX_CNT 0x0000FFFFULL -/* ibs op bits/masks */ -/* lower 4 bits of the current count are ignored: */ -#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32) +/* + * IBS op bits/masks + * The lower 7 bits of the current count are random bits + * preloaded by hardware and ignored in software + */ +#define IBS_OP_CUR_CNT (0xFFF80ULL<<32) +#define IBS_OP_CUR_CNT_RAND (0x0007FULL<<32) #define IBS_OP_CNT_CTL (1ULL<<19) #define IBS_OP_VAL (1ULL<<18) #define IBS_OP_ENABLE (1ULL<<17) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 221a32ed1372..f12e61e2a86b 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -44,15 +44,15 @@ struct mm_struct; void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); -static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) +static inline void native_set_pte(pte_t *ptep, pte_t pte) { - *ptep = native_make_pte(0); + WRITE_ONCE(*ptep, pte); } -static inline void native_set_pte(pte_t *ptep, pte_t pte) +static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - *ptep = pte; + native_set_pte(ptep, native_make_pte(0)); } static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) @@ -62,7 +62,7 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { - *pmdp = pmd; + WRITE_ONCE(*pmdp, pmd); } static inline void native_pmd_clear(pmd_t *pmd) @@ -98,7 +98,7 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) static inline void native_set_pud(pud_t *pudp, pud_t pud) { - *pudp = pud; + WRITE_ONCE(*pudp, pud); } static inline void native_pud_clear(pud_t *pud) @@ -131,7 +131,7 @@ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { - *pgdp = kaiser_set_shadow_pgd(pgdp, pgd); + WRITE_ONCE(*pgdp, kaiser_set_shadow_pgd(pgdp, pgd)); } static inline void native_pgd_clear(pgd_t *pgd) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index ee8c6290c421..7aa9a9bd9d98 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -213,6 +213,24 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, : "memory"); } +#define native_cpuid_reg(reg) \ +static inline unsigned int native_cpuid_##reg(unsigned int op) \ +{ \ + unsigned int eax = op, ebx, ecx = 0, edx; \ + \ + native_cpuid(&eax, &ebx, &ecx, &edx); \ + \ + return reg; \ +} + +/* + * Native CPUID functions returning a single datum. + */ +native_cpuid_reg(eax) +native_cpuid_reg(ebx) +native_cpuid_reg(ecx) +native_cpuid_reg(edx) + static inline void load_cr3(pgd_t *pgdir) { write_cr3(__pa(pgdir)); @@ -874,4 +892,17 @@ enum l1tf_mitigations { extern enum l1tf_mitigations l1tf_mitigation; +enum mds_mitigations { + MDS_MITIGATION_OFF, + MDS_MITIGATION_FULL, + MDS_MITIGATION_VMWERV, +}; + +enum taa_mitigations { + TAA_MITIGATION_OFF, + TAA_MITIGATION_UCODE_NEEDED, + TAA_MITIGATION_VERW, + TAA_MITIGATION_TSX_DISABLED, +}; + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 2b5d686ea9f3..fb489cd848fa 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -115,9 +115,9 @@ static inline int v8086_mode(struct pt_regs *regs) #endif } -#ifdef CONFIG_X86_64 static inline bool user_64bit_mode(struct pt_regs *regs) { +#ifdef CONFIG_X86_64 #ifndef CONFIG_PARAVIRT /* * On non-paravirt systems, this is the only long mode CPL 3 @@ -128,8 +128,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs) /* Headers are too twisted for this to go in paravirt.h. */ return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs; #endif +#else /* !CONFIG_X86_64 */ + return false; +#endif } +#ifdef CONFIG_X86_64 #define current_user_stack_pointer() current_pt_regs()->sp #define compat_user_stack_pointer() current_pt_regs()->sp #endif @@ -196,23 +200,51 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs, } /** + * regs_get_kernel_stack_nth_addr() - get the address of the Nth entry on stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns the address of the @n th entry of the + * kernel stack which is specified by @regs. If the @n th entry is NOT in + * the kernel stack, this returns NULL. + */ +static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return addr; + else + return NULL; +} + +/* To avoid include hell, we can't include uaccess.h */ +extern long probe_kernel_read(void *dst, const void *src, size_t size); + +/** * regs_get_kernel_stack_nth() - get Nth entry of the stack * @regs: pt_regs which contains kernel stack pointer. * @n: stack entry number. * * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which - * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * is specified by @regs. If the @n th entry is NOT in the kernel stack * this returns 0. */ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) { - unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); - addr += n; - if (regs_within_kernel_stack(regs, (unsigned long)addr)) - return *addr; - else - return 0; + unsigned long *addr; + unsigned long val; + long ret; + + addr = regs_get_kernel_stack_nth_addr(regs, n); + if (addr) { + ret = probe_kernel_read(&val, addr, sizeof(val)); + if (!ret) + return val; + } + return 0; } #define arch_has_single_step() (1) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index d25fb6beb2f0..dcaf7100b69c 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -177,16 +177,6 @@ extern int safe_smp_processor_id(void); #endif #ifdef CONFIG_X86_LOCAL_APIC - -#ifndef CONFIG_X86_64 -static inline int logical_smp_processor_id(void) -{ - /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); -} - -#endif - extern int hard_smp_processor_id(void); #else /* CONFIG_X86_LOCAL_APIC */ diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index ae7c2c5cd7f0..5393babc0598 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline u64 stibp_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); + return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); +} + static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) { BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl) +{ + BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); + return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); +} + static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) { return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; @@ -70,11 +82,7 @@ extern void speculative_store_bypass_ht_init(void); static inline void speculative_store_bypass_ht_init(void) { } #endif -extern void speculative_store_bypass_update(unsigned long tif); - -static inline void speculative_store_bypass_update_current(void) -{ - speculative_store_bypass_update(current_thread_info()->flags); -} +extern void speculation_ctrl_update(unsigned long tif); +extern void speculation_ctrl_update_current(void); #endif diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 37f2e0b377ad..4141ead86879 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -55,13 +55,16 @@ extern int kstack_depth_to_print; static inline unsigned long * get_frame_pointer(struct task_struct *task, struct pt_regs *regs) { + struct inactive_task_frame *frame; + if (regs) return (unsigned long *)regs->bp; if (task == current) return __builtin_frame_address(0); - return (unsigned long *)((struct inactive_task_frame *)task->thread.sp)->bp; + frame = (struct inactive_task_frame *)task->thread.sp; + return (unsigned long *)READ_ONCE_NOCHECK(frame->bp); } #else static inline unsigned long * diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index 8e9dbe7b73a1..5cc2ce4ab8a3 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -11,7 +11,13 @@ /* image of the saved processor state */ struct saved_context { - u16 es, fs, gs, ss; + /* + * On x86_32, all segment registers, with the possible exception of + * gs, are saved at kernel entry in pt_regs. + */ +#ifdef CONFIG_X86_32_LAZY_GS + u16 gs; +#endif unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; bool misc_enable_saved; diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 2bd96b4df140..701751918921 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -19,8 +19,20 @@ */ struct saved_context { struct pt_regs regs; - u16 ds, es, fs, gs, ss; - unsigned long gs_base, gs_kernel_base, fs_base; + + /* + * User CS and SS are saved in current_pt_regs(). The rest of the + * segment selectors need to be saved and restored here. + */ + u16 ds, es, fs, gs; + + /* + * Usermode FSBASE and GSBASE may not match the fs and gs selectors, + * so we save them separately. We save the kernelmode GSBASE to + * restore percpu access after resume. + */ + unsigned long kernelmode_gs_base, usermode_gs_base, fs_base; + unsigned long cr0, cr2, cr3, cr4, cr8; u64 misc_enable; bool misc_enable_saved; @@ -29,8 +41,7 @@ struct saved_context { u16 gdt_pad; /* Unused */ struct desc_ptr gdt_desc; u16 idt_pad; - u16 idt_limit; - unsigned long idt_base; + struct desc_ptr idt; u16 ldt; u16 tss; unsigned long tr; diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 5cb436acd463..e959b8d40473 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -8,9 +8,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev, __visible struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); -struct tss_struct; -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss); /* This runs runs on the previous thread's stack. */ static inline void prepare_switch_to(struct task_struct *prev, @@ -38,6 +35,7 @@ asmlinkage void ret_from_fork(void); /* data that is pointed to by thread.sp */ struct inactive_task_frame { + unsigned long flags; #ifdef CONFIG_X86_64 unsigned long r15; unsigned long r14; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 2d8788a59b4d..0438f7fbb383 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -83,10 +83,12 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_SSBD 5 /* Reduced data speculation */ +#define TIF_SSBD 5 /* Speculative store bypass disable */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ +#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ @@ -111,6 +113,8 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SPEC_IB (1 << TIF_SPEC_IB) +#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_NOTSC (1 << TIF_NOTSC) @@ -140,8 +144,18 @@ struct thread_info { _TIF_NOHZ) /* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) +#define _TIF_WORK_CTXSW_BASE \ + (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP| \ + _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE) + +/* + * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. + */ +#ifdef CONFIG_SMP +# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB) +#else +# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE) +#endif #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 686a58d793e5..f5ca15622dc9 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -68,8 +68,12 @@ static inline void invpcid_flush_all_nonglobals(void) struct tlb_state { struct mm_struct *active_mm; int state; - /* last user mm's ctx id */ - u64 last_ctx_id; + + /* Last user mm for optimizing IBPB */ + union { + struct mm_struct *last_user_mm; + unsigned long last_user_mm_ibpb; + }; /* * Access to this CR4 shadow and to H/W CR4 is protected by diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 2177c7551ff7..9db8d8758ed3 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -438,8 +438,10 @@ do { \ ({ \ int __gu_err; \ __inttype(*(ptr)) __gu_val; \ + __typeof__(ptr) __gu_ptr = (ptr); \ + __typeof__(size) __gu_size = (size); \ __uaccess_begin_nospec(); \ - __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ + __get_user_size(__gu_val, __gu_ptr, __gu_size, __gu_err, -EFAULT); \ __uaccess_end(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ __builtin_expect(__gu_err, 0); \ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index e728699db774..59e78c3d3dd8 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -89,8 +89,13 @@ static inline unsigned int __getcpu(void) * works on all CPUs. This is volatile so that it orders * correctly wrt barrier() and to keep gcc from cleverly * hoisting it out of the calling function. + * + * If RDPID is available, use it. */ - asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); + alternative_io ("lsl %[seg],%[p]", + ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ + X86_FEATURE_RDPID, + [p] "=a" (p), [seg] "r" (__PER_CPU_SEG)); return p; } diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index ccdc23d89b60..9f694537a103 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -216,6 +216,9 @@ privcmd_call(unsigned call, __HYPERCALL_DECLS; __HYPERCALL_5ARG(a1, a2, a3, a4, a5); + if (call >= PAGE_SIZE / sizeof(hypercall_page[0])) + return -EINVAL; + stac(); asm volatile(CALL_NOSPEC : __HYPERCALL_5PARAM diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild index 3dec769cadf7..1c532b3f18ea 100644 --- a/arch/x86/include/uapi/asm/Kbuild +++ b/arch/x86/include/uapi/asm/Kbuild @@ -27,7 +27,6 @@ header-y += ldt.h header-y += mce.h header-y += mman.h header-y += msgbuf.h -header-y += msr-index.h header-y += msr.h header-y += mtrr.h header-y += param.h diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 69a6e07e3149..db7dae58745f 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -28,6 +28,8 @@ struct mce { __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ + __u64 ppin; /* Protected Processor Inventory Number */ + __u32 microcode;/* Microcode revision */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) |