From 107d6d2efa9eb8c48d050936d8019230ac6b24cd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 5 May 2008 14:58:26 +0300 Subject: KVM: x86 emulator: fix writes to registers with modrm encodings A register destination encoded with a mod=3 encoding left dst.ptr NULL. Normally we don't trap writes to registers, but in the case of smsw, we do. Fix by pointing dst.ptr at the destination register. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index f2a696d6a243..8a96320ab071 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -677,8 +677,9 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, c->use_modrm_ea = 1; if (c->modrm_mod == 3) { - c->modrm_val = *(unsigned long *) - decode_register(c->modrm_rm, c->regs, c->d & ByteOp); + c->modrm_ptr = decode_register(c->modrm_rm, + c->regs, c->d & ByteOp); + c->modrm_val = *(unsigned long *)c->modrm_ptr; return rc; } @@ -1005,6 +1006,7 @@ done_prefixes: if ((c->d & ModRM) && c->modrm_mod == 3) { c->src.type = OP_REG; c->src.val = c->modrm_val; + c->src.ptr = c->modrm_ptr; break; } c->src.type = OP_MEM; @@ -1049,6 +1051,7 @@ done_prefixes: if ((c->d & ModRM) && c->modrm_mod == 3) { c->dst.type = OP_REG; c->dst.val = c->dst.orig_val = c->modrm_val; + c->dst.ptr = c->modrm_ptr; break; } c->dst.type = OP_MEM; -- cgit v1.2.3 From eedaa4e2af681a266c084c410238855bdfbc2787 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 6 May 2008 13:32:54 -0300 Subject: KVM: PIT: take inject_pending into account when emulating hlt Otherwise hlt emulation fails if PIT is not injecting IRQ's. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 3324d90038e4..7c077a9d9777 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -216,7 +216,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu) { struct kvm_pit *pit = vcpu->kvm->arch.vpit; - if (pit && vcpu->vcpu_id == 0) + if (pit && vcpu->vcpu_id == 0 && pit->pit_state.inject_pending) return atomic_read(&pit->pit_state.pit_timer.pending); return 0; -- cgit v1.2.3 From 54aaacee35afd594bba3244c20b02cc98d80a961 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 14 May 2008 02:29:06 -0300 Subject: KVM: LAPIC: ignore pending timers if LVTT is disabled Only use the APIC pending timers count to break out of HLT emulation if the timer vector is enabled. Certain configurations of Windows simply mask out the vector without disabling the timer. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 36809d79788b..c297c50eba63 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -957,7 +957,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) { struct kvm_lapic *lapic = vcpu->arch.apic; - if (lapic) + if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT)) return atomic_read(&lapic->timer.pending); return 0; -- cgit v1.2.3 From 2ddfd20e7c55421435cbf95a5ed3dd6e423cf934 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 22 May 2008 10:37:48 +0200 Subject: namespacecheck: automated fixes Signed-off-by: Ingo Molnar --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 36c5406b1813..7246b60afb96 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1996,7 +1996,7 @@ static struct shrinker mmu_shrinker = { .seeks = DEFAULT_SEEKS * 10, }; -void mmu_destroy_caches(void) +static void mmu_destroy_caches(void) { if (pte_chain_cache) kmem_cache_destroy(pte_chain_cache); -- cgit v1.2.3 From 33e3885de25148e00595c4dd808d6eb15db2edcf Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 21 May 2008 15:34:25 +0300 Subject: KVM: x86 emulator: fix hypercall return value on AMD The hypercall instructions on Intel and AMD are different. KVM allows the guest to choose one or the other (the default is Intel), and if the guest chooses incorrectly, KVM will patch it at runtime to select the correct instruction. This allows live migration between Intel and AMD machines. This patching occurs in the x86 emulator. The current code also executes the hypercall. Unfortunately, the tail end of the x86 emulator code also executes, overwriting the return value of the hypercall with the original contents of rax (which happens to be the hypercall number). Fix not by executing the hypercall in the emulator context; instead let the guest reissue the patched instruction and execute the hypercall via the normal path. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 8a96320ab071..932f216d890c 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -1727,7 +1727,8 @@ twobyte_insn: if (rc) goto done; - kvm_emulate_hypercall(ctxt->vcpu); + /* Let the processor re-execute the fixed hypercall */ + c->eip = ctxt->vcpu->arch.rip; /* Disable writeback. */ c->dst.type = OP_NONE; break; -- cgit v1.2.3 From 2f5997140f22f68f6390c49941150d3fa8a95cb7 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 27 May 2008 12:10:20 -0300 Subject: KVM: migrate PIT timer Migrate the PIT timer to the physical CPU which vcpu0 is scheduled on, similarly to what is done for the LAPIC timers, otherwise PIT interrupts will be delayed until an unrelated event causes an exit. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.c | 14 +++++++++++++- arch/x86/kvm/irq.c | 6 ++++++ arch/x86/kvm/irq.h | 2 ++ arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/x86.c | 2 +- 6 files changed, 24 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 7c077a9d9777..f2f5d260874e 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -200,7 +200,6 @@ int __pit_timer_fn(struct kvm_kpit_state *ps) atomic_inc(&pt->pending); smp_mb__after_atomic_inc(); - /* FIXME: handle case where the guest is in guest mode */ if (vcpu0 && waitqueue_active(&vcpu0->wq)) { vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; wake_up_interruptible(&vcpu0->wq); @@ -237,6 +236,19 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) return HRTIMER_NORESTART; } +void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) +{ + struct kvm_pit *pit = vcpu->kvm->arch.vpit; + struct hrtimer *timer; + + if (vcpu->vcpu_id != 0 || !pit) + return; + + timer = &pit->pit_state.pit_timer.timer; + if (hrtimer_cancel(timer)) + hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); +} + static void destroy_pit_timer(struct kvm_kpit_timer *pt) { pr_debug("pit: execute del timer!\n"); diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index ce1f583459b1..76d736b5f664 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -94,3 +94,9 @@ void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) /* TODO: PIT, RTC etc. */ } EXPORT_SYMBOL_GPL(kvm_timer_intr_post); + +void __kvm_migrate_timers(struct kvm_vcpu *vcpu) +{ + __kvm_migrate_apic_timer(vcpu); + __kvm_migrate_pit_timer(vcpu); +} diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 1802134b836f..2a15be2275c0 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -84,6 +84,8 @@ void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); +void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); +void __kvm_migrate_timers(struct kvm_vcpu *vcpu); int pit_has_pending_timer(struct kvm_vcpu *vcpu); int apic_has_pending_timer(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ab22615eee89..6b0d5fa5bab3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -688,7 +688,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) delta = vcpu->arch.host_tsc - tsc_this; svm->vmcb->control.tsc_offset += delta; vcpu->cpu = cpu; - kvm_migrate_apic_timer(vcpu); + kvm_migrate_timers(vcpu); } for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bfe4db11989c..96445f341519 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -608,7 +608,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (vcpu->cpu != cpu) { vcpu_clear(vmx); - kvm_migrate_apic_timer(vcpu); + kvm_migrate_timers(vcpu); vpid_sync_vcpu_all(vmx); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 21338bdb28ff..00acf1301a15 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2758,7 +2758,7 @@ again: if (vcpu->requests) { if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) - __kvm_migrate_apic_timer(vcpu); + __kvm_migrate_timers(vcpu); if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests)) { kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS; -- cgit v1.2.3 From e693d71b46e64536581bf4884434fc1b8797e96f Mon Sep 17 00:00:00 2001 From: Eli Collins Date: Sun, 1 Jun 2008 20:24:40 -0700 Subject: KVM: VMX: Clear CR4.VMXE in hardware_disable Clear CR4.VMXE in hardware_disable. There's no reason to leave it set after doing a VMXOFF. VMware Workstation 6.5 checks CR4.VMXE as a proxy for whether the CPU is in VMX mode, so leaving VMXE set means we'll refuse to power on. With this change the user can power on after unloading the kvm-intel module. I tested on kvm-67 and kvm-69. Signed-off-by: Eli Collins Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 96445f341519..02efbe75f317 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1036,6 +1036,7 @@ static void hardware_enable(void *garbage) static void hardware_disable(void *garbage) { asm volatile (ASM_VMX_VMXOFF : : : "cc"); + write_cr4(read_cr4() & ~X86_CR4_VMXE); } static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, -- cgit v1.2.3 From 8d2d73b9a5c35f2c6abf427afba7888cfc4cc65d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 4 Jun 2008 18:42:24 +0300 Subject: KVM: MMU: reschedule during shadow teardown Shadows for large guests can take a long time to tear down, so reschedule occasionally to avoid softlockup warnings. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7246b60afb96..c2fd6a4a58c3 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1858,6 +1858,7 @@ static void free_mmu_pages(struct kvm_vcpu *vcpu) sp = container_of(vcpu->kvm->arch.active_mmu_pages.next, struct kvm_mmu_page, link); kvm_mmu_zap_page(vcpu->kvm, sp); + cond_resched(); } free_page((unsigned long)vcpu->arch.mmu.pae_root); } -- cgit v1.2.3 From ebb0e6264c7a65c51feb3575e9edb58eab0cf469 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 20 May 2008 16:21:58 +0300 Subject: KVM: MMU: Fix printk() format string Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 156fe10288ae..934c7b619396 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -418,7 +418,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, /* mmio */ if (is_error_pfn(pfn)) { - pgprintk("gfn %x is mmio\n", walker.gfn); + pgprintk("gfn %lx is mmio\n", walker.gfn); kvm_release_pfn_clean(pfn); return 1; } -- cgit v1.2.3 From 3c9155106d589584f67b026ec444e69c4a68d7dc Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 20 May 2008 16:21:13 +0300 Subject: KVM: MMU: Fix is_empty_shadow_page() check The check is only looking at one of two possible empty ptes. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c2fd6a4a58c3..ee3f53098f0c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -658,7 +658,7 @@ static int is_empty_shadow_page(u64 *spt) u64 *end; for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) - if (*pos != shadow_trap_nonpresent_pte) { + if (is_shadow_present_pte(*pos)) { printk(KERN_ERR "%s: %p %llx\n", __func__, pos, *pos); return 0; -- cgit v1.2.3