diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/entry/entry_64.S | 13 | ||||
-rw-r--r-- | arch/x86/include/asm/asm.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/mmu.h | 15 | ||||
-rw-r--r-- | arch/x86/include/asm/mmu_context.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/nospec-branch.h | 37 | ||||
-rw-r--r-- | arch/x86/include/asm/tlbflush.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/apic/vector.c | 14 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 9 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 9 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 33 | ||||
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 9 | ||||
-rw-r--r-- | arch/x86/oprofile/nmi_int.c | 2 | ||||
-rw-r--r-- | arch/x86/platform/intel-mid/intel-mid.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/suspend.c | 16 |
14 files changed, 145 insertions, 23 deletions
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index db5009ce065a..8d7e4d48db0d 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -176,13 +176,26 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) pushq %r8 /* pt_regs->r8 */ pushq %r9 /* pt_regs->r9 */ pushq %r10 /* pt_regs->r10 */ + /* + * Clear extra registers that a speculation attack might + * otherwise want to exploit. Interleave XOR with PUSH + * for better uop scheduling: + */ + xorq %r10, %r10 /* nospec r10 */ pushq %r11 /* pt_regs->r11 */ + xorq %r11, %r11 /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ + xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp */ + xorl %ebp, %ebp /* nospec rbp */ pushq %r12 /* pt_regs->r12 */ + xorq %r12, %r12 /* nospec r12 */ pushq %r13 /* pt_regs->r13 */ + xorq %r13, %r13 /* nospec r13 */ pushq %r14 /* pt_regs->r14 */ + xorq %r14, %r14 /* nospec r14 */ pushq %r15 /* pt_regs->r15 */ + xorq %r15, %r15 /* nospec r15 */ /* IRQs are off. */ movq %rsp, %rdi diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 7bb29a416b77..f35f246a26bf 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -128,6 +128,7 @@ #endif #ifndef __ASSEMBLY__ +#ifndef __BPF__ /* * This output constraint should be used for any inline asm which has a "call" * instruction. Otherwise the asm may be inserted before the frame pointer @@ -137,5 +138,6 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif +#endif #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 8b272a08d1a8..e2e09347ee3c 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -3,12 +3,18 @@ #include <linux/spinlock.h> #include <linux/mutex.h> +#include <linux/atomic.h> /* - * The x86 doesn't have a mmu context, but - * we put the segment information here. + * x86 has arch-specific MMU state beyond what lives in mm_struct. */ typedef struct { + /* + * ctx_id uniquely identifies this mm_struct. A ctx_id will never + * be reused, and zero is not a valid ctx_id. + */ + u64 ctx_id; + #ifdef CONFIG_MODIFY_LDT_SYSCALL struct ldt_struct *ldt; #endif @@ -33,6 +39,11 @@ typedef struct { #endif } mm_context_t; +#define INIT_MM_CONTEXT(mm) \ + .context = { \ + .ctx_id = 1, \ + } + void leave_mm(int cpu); #endif /* _ASM_X86_MMU_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index d23e35584f15..5a295bb97103 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -12,6 +12,9 @@ #include <asm/tlbflush.h> #include <asm/paravirt.h> #include <asm/mpx.h> + +extern atomic64_t last_mm_ctx_id; + #ifndef CONFIG_PARAVIRT static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) @@ -106,6 +109,8 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); + #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { /* pkey 0 is the default and always allocated */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 76b058533e47..81a1be326571 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -177,4 +177,41 @@ static inline void indirect_branch_prediction_barrier(void) } #endif /* __ASSEMBLY__ */ + +/* + * Below is used in the eBPF JIT compiler and emits the byte sequence + * for the following assembly: + * + * With retpolines configured: + * + * callq do_rop + * spec_trap: + * pause + * lfence + * jmp spec_trap + * do_rop: + * mov %rax,(%rsp) + * retq + * + * Without retpolines configured: + * + * jmp *%rax + */ +#ifdef CONFIG_RETPOLINE +# define RETPOLINE_RAX_BPF_JIT_SIZE 17 +# define RETPOLINE_RAX_BPF_JIT() \ + EMIT1_off32(0xE8, 7); /* callq do_rop */ \ + /* spec_trap: */ \ + EMIT2(0xF3, 0x90); /* pause */ \ + EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ + EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ + /* do_rop: */ \ + EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ + EMIT1(0xC3); /* retq */ +#else +# define RETPOLINE_RAX_BPF_JIT_SIZE 2 +# define RETPOLINE_RAX_BPF_JIT() \ + EMIT2(0xFF, 0xE0); /* jmp *%rax */ +#endif + #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 94146f665a3c..99185a064978 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -68,6 +68,8 @@ static inline void invpcid_flush_all_nonglobals(void) struct tlb_state { struct mm_struct *active_mm; int state; + /* last user mm's ctx id */ + u64 last_ctx_id; /* * Access to this CR4 shadow and to H/W CR4 is protected by diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index b5229abd1629..4922ab66fd29 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -93,8 +93,12 @@ out_data: return NULL; } -static void free_apic_chip_data(struct apic_chip_data *data) +static void free_apic_chip_data(unsigned int virq, struct apic_chip_data *data) { +#ifdef CONFIG_X86_IO_APIC + if (virq < nr_legacy_irqs()) + legacy_irq_data[virq] = NULL; +#endif if (data) { free_cpumask_var(data->domain); free_cpumask_var(data->old_domain); @@ -318,11 +322,7 @@ static void x86_vector_free_irqs(struct irq_domain *domain, apic_data = irq_data->chip_data; irq_domain_reset_irq_data(irq_data); raw_spin_unlock_irqrestore(&vector_lock, flags); - free_apic_chip_data(apic_data); -#ifdef CONFIG_X86_IO_APIC - if (virq + i < nr_legacy_irqs()) - legacy_irq_data[virq + i] = NULL; -#endif + free_apic_chip_data(virq + i, apic_data); } } } @@ -363,7 +363,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, err = assign_irq_vector_policy(virq + i, node, data, info); if (err) { irq_data->chip_data = NULL; - free_apic_chip_data(data); + free_apic_chip_data(virq + i, data); goto error; } } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index be644afab1bb..24d2a3ee743f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -44,6 +44,7 @@ #include <asm/debugreg.h> #include <asm/kvm_para.h> #include <asm/irq_remapping.h> +#include <asm/microcode.h> #include <asm/nospec-branch.h> #include <asm/virtext.h> @@ -4919,7 +4920,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (svm->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5028,11 +5029,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (svm->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, 0); + native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c51aaac953b4..0f3bb4632310 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -49,6 +49,7 @@ #include <asm/kexec.h> #include <asm/apic.h> #include <asm/irq_remapping.h> +#include <asm/microcode.h> #include <asm/nospec-branch.h> #include "trace.h" @@ -8906,7 +8907,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (vmx->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); vmx->__launched = vmx->loaded_vmcs->launched; asm( @@ -9041,11 +9042,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (vmx->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, 0); + native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 578973ade71b..eac92e2d171b 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -10,6 +10,7 @@ #include <asm/tlbflush.h> #include <asm/mmu_context.h> +#include <asm/nospec-branch.h> #include <asm/cache.h> #include <asm/apic.h> #include <asm/uv/uv.h> @@ -29,6 +30,8 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */ +atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); + struct flush_tlb_info { struct mm_struct *flush_mm; unsigned long flush_start; @@ -104,6 +107,28 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, unsigned cpu = smp_processor_id(); if (likely(prev != next)) { + u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); + + /* + * Avoid user/user BTB poisoning by flushing the branch + * predictor when switching between processes. This stops + * one process from doing Spectre-v2 attacks on another. + * + * As an optimization, flush indirect branches only when + * switching into processes that disable dumping. This + * protects high value processes like gpg, without having + * too high performance overhead. IBPB is *expensive*! + * + * This will not flush branches when switching into kernel + * threads. It will also not flush if we switch to idle + * thread and back to the same process. It will flush if we + * switch to a different non-dumpable process. + */ + if (tsk && tsk->mm && + tsk->mm->context.ctx_id != last_ctx_id && + get_dumpable(tsk->mm) != SUID_DUMP_USER) + indirect_branch_prediction_barrier(); + if (IS_ENABLED(CONFIG_VMAP_STACK)) { /* * If our current stack is in vmalloc space and isn't @@ -118,6 +143,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, set_pgd(pgd, init_mm.pgd[stack_pgd_index]); } + /* + * Record last user mm's context id, so we can avoid + * flushing branch buffer with IBPB if we switch back + * to the same user. + */ + if (next != &init_mm) + this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); + this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); this_cpu_write(cpu_tlbstate.active_mm, next); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 7840331d3056..1f7ed2ed6ff7 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -12,6 +12,7 @@ #include <linux/filter.h> #include <linux/if_vlan.h> #include <asm/cacheflush.h> +#include <asm/nospec-branch.h> #include <linux/bpf.h> int bpf_jit_enable __read_mostly; @@ -281,7 +282,7 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT2(0x89, 0xD2); /* mov edx, edx */ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); -#define OFFSET1 43 /* number of bytes to jump */ +#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; @@ -290,7 +291,7 @@ static void emit_bpf_tail_call(u8 **pprog) */ EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 32 +#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ @@ -304,7 +305,7 @@ static void emit_bpf_tail_call(u8 **pprog) * goto out; */ EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ -#define OFFSET3 10 +#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) EMIT2(X86_JE, OFFSET3); /* je out */ label3 = cnt; @@ -317,7 +318,7 @@ static void emit_bpf_tail_call(u8 **pprog) * rdi == ctx (1st arg) * rax == prog->bpf_func + prologue_size */ - EMIT2(0xFF, 0xE0); /* jmp rax */ + RETPOLINE_RAX_BPF_JIT(); /* out: */ BUILD_BUG_ON(cnt - label1 != OFFSET1); diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 28c04123b6dd..842ca3cab6b3 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -472,7 +472,7 @@ static int nmi_setup(void) goto fail; for_each_possible_cpu(cpu) { - if (!cpu) + if (!IS_ENABLED(CONFIG_SMP) || !cpu) continue; memcpy(per_cpu(cpu_msrs, cpu).counters, diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 7850128f0026..834783bc6752 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -79,7 +79,7 @@ static void intel_mid_power_off(void) static void intel_mid_reboot(void) { - intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0); + intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0); } static unsigned long __init intel_mid_calibrate_tsc(void) diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 7f664c416faf..4ecd0de08557 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -1,11 +1,14 @@ #include <linux/types.h> #include <linux/tick.h> +#include <linux/percpu-defs.h> #include <xen/xen.h> #include <xen/interface/xen.h> #include <xen/grant_table.h> #include <xen/events.h> +#include <asm/cpufeatures.h> +#include <asm/msr-index.h> #include <asm/xen/hypercall.h> #include <asm/xen/page.h> #include <asm/fixmap.h> @@ -68,6 +71,8 @@ static void xen_pv_post_suspend(int suspend_cancelled) xen_mm_unpin_all(); } +static DEFINE_PER_CPU(u64, spec_ctrl); + void xen_arch_pre_suspend(void) { if (xen_pv_domain()) @@ -84,6 +89,9 @@ void xen_arch_post_suspend(int cancelled) static void xen_vcpu_notify_restore(void *data) { + if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl)); + /* Boot processor notified via generic timekeeping_resume() */ if (smp_processor_id() == 0) return; @@ -93,7 +101,15 @@ static void xen_vcpu_notify_restore(void *data) static void xen_vcpu_notify_suspend(void *data) { + u64 tmp; + tick_suspend_local(); + + if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) { + rdmsrl(MSR_IA32_SPEC_CTRL, tmp); + this_cpu_write(spec_ctrl, tmp); + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + } } void xen_arch_resume(void) |