KVM: VMX: Tell the nested hypervisor to skip L1D flush on vmentry

commit 5b76a3cff011df2dcb6186c965a2e4d809a05ad4 upstream When nested virtualization is in use, VMENTER operations from the nested hypervisor into the nested guest will always be processed by the bare metal hypervisor, and KVM's "conditional cache flushes" mode in particular does a flush on nested vmentry. Therefore, include the "skip L1D flush on vmentry" bit in KVM's suggested ARCH_CAPABILITIES setting. Add the relevant Documentation. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Paolo Bonzini <pbonzini@redhat.com> 2018-08-05 16:07:47 +0200
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2018-08-15 18:14:53 +0200
commit: f56c8ee659c926bdba42c0d45405433e1a00eb2e (patch)
tree: 2c913d171a8c02b0d4749fe3815341b0026002c2 /arch/x86
parent: 383f160027af7f3e3c32c2988980652e708a2119 (diff)
3 files changed, 27 insertions, 3 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 631c3f59109a..22a0ccb17ad0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1346,6 +1346,7 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
 void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 					   unsigned long address);
 
+u64 kvm_get_arch_capabilities(void);
 void kvm_define_shared_msr(unsigned index, u32 msr);
 int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 46f271803e6f..12826607a995 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5461,8 +5461,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 		++vmx->nmsrs;
 	}
 
-	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
-		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
+	vmx->arch_capabilities = kvm_get_arch_capabilities();
 
 	vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 62593b740c82..203d42340fc1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1020,11 +1020,35 @@ static u32 msr_based_features[] = {
 
 static unsigned int num_msr_based_features;
 
+u64 kvm_get_arch_capabilities(void)
+{
+	u64 data;
+
+	rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data);
+
+	/*
+	 * If we're doing cache flushes (either "always" or "cond")
+	 * we will do one whenever the guest does a vmlaunch/vmresume.
+	 * If an outer hypervisor is doing the cache flush for us
+	 * (VMENTER_L1D_FLUSH_NESTED_VM), we can safely pass that
+	 * capability to the guest too, and if EPT is disabled we're not
+	 * vulnerable.  Overall, only VMENTER_L1D_FLUSH_NEVER will
+	 * require a nested hypervisor to do a flush of its own.
+	 */
+	if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
+		data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
+
+	return data;
+}
+EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
+
 static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
 {
 	switch (msr->index) {
-	case MSR_IA32_UCODE_REV:
 	case MSR_IA32_ARCH_CAPABILITIES:
+		msr->data = kvm_get_arch_capabilities();
+		break;
+	case MSR_IA32_UCODE_REV:
 		rdmsrl_safe(msr->index, &msr->data);
 		break;
 	default:
author	Paolo Bonzini <pbonzini@redhat.com>	2018-08-05 16:07:47 +0200
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2018-08-15 18:14:53 +0200
commit	f56c8ee659c926bdba42c0d45405433e1a00eb2e (patch)
tree	2c913d171a8c02b0d4749fe3815341b0026002c2 /arch/x86
parent	383f160027af7f3e3c32c2988980652e708a2119 (diff)