From 5ff22e7ebf2e75e6300ad968a6e529b5e70877f1 Mon Sep 17 00:00:00 2001 From: Nicholas Krause Date: Thu, 18 Dec 2014 21:13:22 -0500 Subject: KVM: x86: Remove FIXMEs in emulate.c for the function,task_switch_32 Remove FIXME comments about needing fault addresses to be returned. These are propaagated from walk_addr_generic to gva_to_gpa and from there to ops->read_std and ops->write_std. Signed-off-by: Nicholas Krause Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 169b09d76ddd..feaba468cce6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2750,7 +2750,6 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) - /* FIXME: need to provide precise fault address */ return ret; save_state_to_tss32(ctxt, &tss_seg); @@ -2759,13 +2758,11 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip, ldt_sel_offset - eip_offset, &ctxt->exception); if (ret != X86EMUL_CONTINUE) - /* FIXME: need to provide precise fault address */ return ret; ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); if (ret != X86EMUL_CONTINUE) - /* FIXME: need to provide precise fault address */ return ret; if (old_tss_sel != 0xffff) { @@ -2776,7 +2773,6 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, sizeof tss_seg.prev_task_link, &ctxt->exception); if (ret != X86EMUL_CONTINUE) - /* FIXME: need to provide precise fault address */ return ret; } -- cgit v1.2.3 From 3313bc4ee83c4e2870d8e83800c6064b0d215679 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 25 Dec 2014 02:52:17 +0200 Subject: KVM: x86: pop sreg accesses only 2 bytes Although pop sreg updates RSP according to the operand size, only 2 bytes are read. The current behavior may result in incorrect #GP or #PF exceptions. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index feaba468cce6..abe95d2e6848 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1828,12 +1828,14 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) unsigned long selector; int rc; - rc = emulate_pop(ctxt, &selector, ctxt->op_bytes); + rc = emulate_pop(ctxt, &selector, 2); if (rc != X86EMUL_CONTINUE) return rc; if (ctxt->modrm_reg == VCPU_SREG_SS) ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; + if (ctxt->op_bytes > 2) + rsp_increment(ctxt, ctxt->op_bytes - 2); rc = load_segment_descriptor(ctxt, (u16)selector, seg); return rc; -- cgit v1.2.3 From 16bebefe29d8495c89961a9d57ea1947547a5211 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 25 Dec 2014 02:52:18 +0200 Subject: KVM: x86: fnstcw and fnstsw may cause spurious exception Since the operand size of fnstcw and fnstsw is updated during the execution, the emulation may cause spurious exceptions as it reads the memory beforehand. Marking these instructions as Mov (since the previous value is ignored) and DstMem16 to simplify the setting of operand size. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index abe95d2e6848..fff11885a3a0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -86,6 +86,7 @@ #define DstAcc (OpAcc << DstShift) #define DstDI (OpDI << DstShift) #define DstMem64 (OpMem64 << DstShift) +#define DstMem16 (OpMem16 << DstShift) #define DstImmUByte (OpImmUByte << DstShift) #define DstDX (OpDX << DstShift) #define DstAccLo (OpAccLo << DstShift) @@ -1057,8 +1058,6 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt) asm volatile("fnstcw %0": "+m"(fcw)); ctxt->ops->put_fpu(ctxt); - /* force 2 byte destination */ - ctxt->dst.bytes = 2; ctxt->dst.val = fcw; return X86EMUL_CONTINUE; @@ -1075,8 +1074,6 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt) asm volatile("fnstsw %0": "+m"(fsw)); ctxt->ops->put_fpu(ctxt); - /* force 2 byte destination */ - ctxt->dst.bytes = 2; ctxt->dst.val = fsw; return X86EMUL_CONTINUE; @@ -3863,7 +3860,7 @@ static const struct gprefix pfx_0f_e7 = { }; static const struct escape escape_d9 = { { - N, N, N, N, N, N, N, I(DstMem, em_fnstcw), + N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw), }, { /* 0xC0 - 0xC7 */ N, N, N, N, N, N, N, N, @@ -3905,7 +3902,7 @@ static const struct escape escape_db = { { } }; static const struct escape escape_dd = { { - N, N, N, N, N, N, N, I(DstMem, em_fnstsw), + N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw), }, { /* 0xC0 - 0xC7 */ N, N, N, N, N, N, N, N, -- cgit v1.2.3 From 3dc4bc4f6b9265bd05dda007b07eac5a17da0562 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 25 Dec 2014 02:52:19 +0200 Subject: KVM: x86: JMP/CALL using call- or task-gate causes exception The KVM emulator does not emulate JMP and CALL that target a call gate or a task gate. This patch does not try to implement these scenario as they are presumably rare; yet it returns X86EMUL_UNHANDLEABLE error in such cases instead of generating an exception. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 54 +++++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 20 deletions(-) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index fff11885a3a0..1fec3ed86cbf 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -263,6 +263,13 @@ struct instr_dual { #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a #define EFLG_RESERVED_ONE_MASK 2 +enum x86_transfer_type { + X86_TRANSFER_NONE, + X86_TRANSFER_CALL_JMP, + X86_TRANSFER_RET, + X86_TRANSFER_TASK_SWITCH, +}; + static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr) { if (!(ctxt->regs_valid & (1 << nr))) { @@ -1472,7 +1479,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, /* Does not support long mode */ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, int seg, u8 cpl, - bool in_task_switch, + enum x86_transfer_type transfer, struct desc_struct *desc) { struct desc_struct seg_desc, old_desc; @@ -1526,11 +1533,15 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, return ret; err_code = selector & 0xfffc; - err_vec = in_task_switch ? TS_VECTOR : GP_VECTOR; + err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR : + GP_VECTOR; /* can't load system descriptor into segment selector */ - if (seg <= VCPU_SREG_GS && !seg_desc.s) + if (seg <= VCPU_SREG_GS && !seg_desc.s) { + if (transfer == X86_TRANSFER_CALL_JMP) + return X86EMUL_UNHANDLEABLE; goto exception; + } if (!seg_desc.p) { err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; @@ -1628,7 +1639,8 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, int seg) { u8 cpl = ctxt->ops->cpl(ctxt); - return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL); + return __load_segment_descriptor(ctxt, selector, seg, cpl, + X86_TRANSFER_NONE, NULL); } static void write_register_operand(struct operand *op) @@ -2040,7 +2052,8 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); - rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, + rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, + X86_TRANSFER_CALL_JMP, &new_desc); if (rc != X86EMUL_CONTINUE) return rc; @@ -2129,7 +2142,8 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) /* Outer-privilege level return is not implemented */ if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) return X86EMUL_UNHANDLEABLE; - rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, false, + rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, + X86_TRANSFER_RET, &new_desc); if (rc != X86EMUL_CONTINUE) return rc; @@ -2566,23 +2580,23 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, * it is handled in a context of new task */ ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, - true, NULL); + X86_TRANSFER_TASK_SWITCH, NULL); if (ret != X86EMUL_CONTINUE) return ret; ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, - true, NULL); + X86_TRANSFER_TASK_SWITCH, NULL); if (ret != X86EMUL_CONTINUE) return ret; ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, - true, NULL); + X86_TRANSFER_TASK_SWITCH, NULL); if (ret != X86EMUL_CONTINUE) return ret; ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, - true, NULL); + X86_TRANSFER_TASK_SWITCH, NULL); if (ret != X86EMUL_CONTINUE) return ret; ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, - true, NULL); + X86_TRANSFER_TASK_SWITCH, NULL); if (ret != X86EMUL_CONTINUE) return ret; @@ -2704,31 +2718,31 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, * it is handled in a context of new task */ ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, - cpl, true, NULL); + cpl, X86_TRANSFER_TASK_SWITCH, NULL); if (ret != X86EMUL_CONTINUE) return ret; ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, - true, NULL); + X86_TRANSFER_TASK_SWITCH, NULL); if (ret != X86EMUL_CONTINUE) return ret; ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, - true, NULL); + X86_TRANSFER_TASK_SWITCH, NULL); if (ret != X86EMUL_CONTINUE) return ret; ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, - true, NULL); + X86_TRANSFER_TASK_SWITCH, NULL); if (ret != X86EMUL_CONTINUE) return ret; ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, - true, NULL); + X86_TRANSFER_TASK_SWITCH, NULL); if (ret != X86EMUL_CONTINUE) return ret; ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, - true, NULL); + X86_TRANSFER_TASK_SWITCH, NULL); if (ret != X86EMUL_CONTINUE) return ret; ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, - true, NULL); + X86_TRANSFER_TASK_SWITCH, NULL); if (ret != X86EMUL_CONTINUE) return ret; @@ -3010,8 +3024,8 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS); memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); - rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, - &new_desc); + rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, + X86_TRANSFER_CALL_JMP, &new_desc); if (rc != X86EMUL_CONTINUE) return X86EMUL_CONTINUE; -- cgit v1.2.3 From 80976dbb5cb2b64480d7d38981b3220887575728 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 25 Dec 2014 02:52:20 +0200 Subject: KVM: x86: em_call_far should return failure result Currently, if em_call_far fails it returns success instead of the resulting error-code. Fix it. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1fec3ed86cbf..8f32c03515ad 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3027,7 +3027,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, X86_TRANSFER_CALL_JMP, &new_desc); if (rc != X86EMUL_CONTINUE) - return X86EMUL_CONTINUE; + return rc; rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); if (rc != X86EMUL_CONTINUE) -- cgit v1.2.3 From ab708099a0617e2c37b26d9ecbb373456057ba9b Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 25 Dec 2014 02:52:21 +0200 Subject: KVM: x86: POP [ESP] is not emulated correctly According to Intel SDM: "If the ESP register is used as a base register for addressing a destination operand in memory, the POP instruction computes the effective address of the operand after it increments the ESP register." The current emulation does not behave so. The fix required to waste another of the precious instruction flags and to check the flag in decode_modrm. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8f32c03515ad..cc24b74b7454 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -170,6 +170,7 @@ #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */ #define NearBranch ((u64)1 << 52) /* Near branches */ #define No16 ((u64)1 << 53) /* No 16 bit operand */ +#define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */ #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) @@ -1227,6 +1228,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, else { modrm_ea += reg_read(ctxt, base_reg); adjust_modrm_seg(ctxt, base_reg); + /* Increment ESP on POP [ESP] */ + if ((ctxt->d & IncSP) && + base_reg == VCPU_REGS_RSP) + modrm_ea += ctxt->op_bytes; } if (index_reg != 4) modrm_ea += reg_read(ctxt, index_reg) << scale; @@ -3758,7 +3763,7 @@ static const struct opcode group1[] = { }; static const struct opcode group1A[] = { - I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, + I(DstMem | SrcNone | Mov | Stack | IncSP, em_pop), N, N, N, N, N, N, N, }; static const struct opcode group2[] = { -- cgit v1.2.3 From e2cefa746e7e2a1104931d411b6f5de159d98ec6 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 25 Dec 2014 02:52:22 +0200 Subject: KVM: x86: Do not set access bit on accessed segments When segment is loaded, the segment access bit is set unconditionally. In fact, it should be set conditionally, based on whether the segment had the accessed bit set before. In addition, it can improve performance. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index cc24b74b7454..e36e1fc5bf85 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1618,10 +1618,13 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (seg_desc.s) { /* mark segment as accessed */ - seg_desc.type |= 1; - ret = write_segment_descriptor(ctxt, selector, &seg_desc); - if (ret != X86EMUL_CONTINUE) - return ret; + if (!(seg_desc.type & 1)) { + seg_desc.type |= 1; + ret = write_segment_descriptor(ctxt, selector, + &seg_desc); + if (ret != X86EMUL_CONTINUE) + return ret; + } } else if (ctxt->mode == X86EMUL_MODE_PROT64) { ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3, sizeof(base3), &ctxt->exception); -- cgit v1.2.3 From edccda7ca7e56b335c70ae512f89d0fdf7fb8c69 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 25 Dec 2014 02:52:23 +0200 Subject: KVM: x86: Access to LDT/GDT that wraparound is incorrect When access to descriptor in LDT/GDT wraparound outside long-mode, the address of the descriptor should be truncated to 32-bit. Citing Intel SDM 2.1.1.1 "Global and Local Descriptor Tables in IA-32e Mode": "GDTR and LDTR registers are expanded to 64-bits wide in both IA-32e sub-modes (64-bit mode and compatibility mode)." So in other cases, we need to truncate. Creating new function to return a pointer to descriptor table to avoid too much code duplication. Signed-off-by: Nadav Amit [Wrap 64-bit check with #ifdef CONFIG_X86_64, to avoid a "right shift count >= width of type" warning and consequent undefined behavior. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 47 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 13 deletions(-) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e36e1fc5bf85..d949287ed010 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1444,10 +1444,8 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, ops->get_gdt(ctxt, dt); } -/* allowed just for 8 bytes segments */ -static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, - u16 selector, struct desc_struct *desc, - ulong *desc_addr_p) +static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt, + u16 selector, ulong *desc_addr_p) { struct desc_ptr dt; u16 index = selector >> 3; @@ -1458,8 +1456,34 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (dt.size < index * 8 + 7) return emulate_gp(ctxt, selector & 0xfffc); - *desc_addr_p = addr = dt.address + index * 8; - return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc, + addr = dt.address + index * 8; + +#ifdef CONFIG_X86_64 + if (addr >> 32 != 0) { + u64 efer = 0; + + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + if (!(efer & EFER_LMA)) + addr &= (u32)-1; + } +#endif + + *desc_addr_p = addr; + return X86EMUL_CONTINUE; +} + +/* allowed just for 8 bytes segments */ +static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, + u16 selector, struct desc_struct *desc, + ulong *desc_addr_p) +{ + int rc; + + rc = get_descriptor_ptr(ctxt, selector, desc_addr_p); + if (rc != X86EMUL_CONTINUE) + return rc; + + return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc), &ctxt->exception); } @@ -1467,16 +1491,13 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, struct desc_struct *desc) { - struct desc_ptr dt; - u16 index = selector >> 3; + int rc; ulong addr; - get_descriptor_table_ptr(ctxt, selector, &dt); - - if (dt.size < index * 8 + 7) - return emulate_gp(ctxt, selector & 0xfffc); + rc = get_descriptor_ptr(ctxt, selector, &addr); + if (rc != X86EMUL_CONTINUE) + return rc; - addr = dt.address + index * 8; return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); } -- cgit v1.2.3 From c205fb7d7d4f81e46fc577b707ceb9e356af1456 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 25 Dec 2014 02:52:16 +0200 Subject: KVM: x86: #PF error-code on R/W operations is wrong When emulating an instruction that reads the destination memory operand (i.e., instructions without the Mov flag in the emulator), the operand is first read. If a page-fault is detected in this phase, the error-code which would be delivered to the VM does not indicate that the access that caused the exception is a write one. This does not conform with real hardware, and may cause the VM to enter the page-fault handler twice for no reason (once for read, once for write). Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d949287ed010..ef23c1e5fa9f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4909,8 +4909,12 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) /* optimisation - avoid slow emulated read if Mov */ rc = segmented_read(ctxt, ctxt->dst.addr.mem, &ctxt->dst.val, ctxt->dst.bytes); - if (rc != X86EMUL_CONTINUE) + if (rc != X86EMUL_CONTINUE) { + if (rc == X86EMUL_PROPAGATE_FAULT && + ctxt->exception.vector == PF_VECTOR) + ctxt->exception.error_code |= PFERR_WRITE_MASK; goto done; + } } ctxt->dst.orig_val = ctxt->dst.val; -- cgit v1.2.3 From 2fcf5c8ae244b4c298d2111a288d410a719ac626 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 26 Jan 2015 09:32:21 +0200 Subject: KVM: x86: Dirty the dest op page on cmpxchg emulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel SDM says for CMPXCHG: "To simplify the interface to the processor’s bus, the destination operand receives a write cycle without regard to the result of the comparison.". This means the destination page should be dirtied. Fix it to by writing back the original value if cmpxchg failed. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ef23c1e5fa9f..aa272545402e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2205,12 +2205,15 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) fastop(ctxt, em_cmp); if (ctxt->eflags & EFLG_ZF) { - /* Success: write back to memory. */ + /* Success: write back to memory; no update of EAX */ + ctxt->src.type = OP_NONE; ctxt->dst.val = ctxt->src.orig_val; } else { /* Failure: write the value we saw to EAX. */ - ctxt->dst.type = OP_REG; - ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); + ctxt->src.type = OP_REG; + ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); + ctxt->src.val = ctxt->dst.orig_val; + /* Create write-cycle to dest by writing the same value */ ctxt->dst.val = ctxt->dst.orig_val; } return X86EMUL_CONTINUE; @@ -4157,7 +4160,7 @@ static const struct opcode twobyte_table[256] = { F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul), /* 0xB0 - 0xB7 */ - I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), + I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg), I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), -- cgit v1.2.3 From 16794aaaab66fa74ab19588a8e255a460e8b3ace Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 26 Jan 2015 09:32:22 +0200 Subject: KVM: x86: Wrong operand size for far ret Indeed, Intel SDM specifically states that for the RET instruction "In 64-bit mode, the default operation size of this instruction is the stack-address size, i.e. 64 bits." However, experiments show this is not the case. Here is for example objdump of small 64-bit asm: 4004f1: ca 14 00 lret $0x14 4004f4: 48 cb lretq 4004f6: 48 ca 14 00 lretq $0x14 Therefore, remove the Stack flag from far-ret instructions. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index aa272545402e..dd7100481aac 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4062,8 +4062,8 @@ static const struct opcode opcode_table[256] = { G(ByteOp, group11), G(0, group11), /* 0xC8 - 0xCF */ I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave), - I(ImplicitOps | Stack | SrcImmU16, em_ret_far_imm), - I(ImplicitOps | Stack, em_ret_far), + I(ImplicitOps | SrcImmU16, em_ret_far_imm), + I(ImplicitOps, em_ret_far), D(ImplicitOps), DI(SrcImmByte, intn), D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), /* 0xD0 - 0xD7 */ -- cgit v1.2.3 From 801806d956c2c198b9fdd3afd156a536f9a3a139 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 26 Jan 2015 09:32:23 +0200 Subject: KVM: x86: IRET emulation does not clear NMI masking The IRET instruction should clear NMI masking, but the current implementation does not do so. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index dd7100481aac..fa3ca55a50c6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2047,6 +2047,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */ ctxt->eflags |= EFLG_RESERVED_ONE_MASK; + ctxt->ops->set_nmi_mask(ctxt, false); return rc; } -- cgit v1.2.3 From 2276b5116e983277073623cd363954e41674c382 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 26 Jan 2015 09:32:24 +0200 Subject: KVM: x86: ARPL emulation can cause spurious exceptions ARPL and MOVSXD are encoded the same and their execution depends on the execution mode. The operand sizes of each instruction are different. Currently, ARPL is detected too late, after the decoding was already done, and therefore may result in spurious exception (instead of failed emulation). Introduce a group to the emulator to handle instructions according to execution mode (32/64 bits). Note: in order not to make changes that may affect performance, the new ModeDual can only be applied to instructions with ModRM. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index fa3ca55a50c6..db3cf399e39e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -125,6 +125,7 @@ #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ #define Escape (5<<15) /* Escape to coprocessor instruction */ #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */ +#define ModeDual (7<<15) /* Different instruction for 32/64 bit */ #define Sse (1<<18) /* SSE Vector instruction */ /* Generic ModRM decode. */ #define ModRM (1<<19) @@ -215,6 +216,7 @@ struct opcode { const struct gprefix *gprefix; const struct escape *esc; const struct instr_dual *idual; + const struct mode_dual *mdual; void (*fastop)(struct fastop *fake); } u; int (*check_perm)(struct x86_emulate_ctxt *ctxt); @@ -242,6 +244,11 @@ struct instr_dual { struct opcode mod3; }; +struct mode_dual { + struct opcode mode32; + struct opcode mode64; +}; + /* EFLAGS bit definitions. */ #define EFLG_ID (1<<21) #define EFLG_VIP (1<<20) @@ -3530,6 +3537,12 @@ static int em_clflush(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_movsxd(struct x86_emulate_ctxt *ctxt) +{ + ctxt->dst.val = (s32) ctxt->src.val; + return X86EMUL_CONTINUE; +} + static bool valid_cr(int nr) { switch (nr) { @@ -3729,6 +3742,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) } +#define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) } #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } @@ -3973,6 +3987,10 @@ static const struct instr_dual instr_dual_0f_c3 = { I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N }; +static const struct mode_dual mode_dual_63 = { + N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd) +}; + static const struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ F6ALU(Lock, em_add), @@ -4007,7 +4025,7 @@ static const struct opcode opcode_table[256] = { /* 0x60 - 0x67 */ I(ImplicitOps | Stack | No64, em_pusha), I(ImplicitOps | Stack | No64, em_popa), - N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ , + N, MD(ModRM, &mode_dual_63), N, N, N, N, /* 0x68 - 0x6F */ I(SrcImm | Mov | Stack, em_push), @@ -4227,6 +4245,7 @@ static const struct opcode opcode_map_0f_38[256] = { #undef I #undef GP #undef EXT +#undef MD #undef D2bv #undef D2bvIP @@ -4616,6 +4635,12 @@ done_prefixes: else opcode = opcode.u.idual->mod012; break; + case ModeDual: + if (ctxt->mode == X86EMUL_MODE_PROT64) + opcode = opcode.u.mdual->mode64; + else + opcode = opcode.u.mdual->mode32; + break; default: return EMULATION_FAILED; } @@ -4956,11 +4981,6 @@ special_insn: goto threebyte_insn; switch (ctxt->b) { - case 0x63: /* movsxd */ - if (ctxt->mode != X86EMUL_MODE_PROT64) - goto cannot_emulate; - ctxt->dst.val = (s32) ctxt->src.val; - break; case 0x70 ... 0x7f: /* jcc (short) */ if (test_cc(ctxt->b, ctxt->eflags)) rc = jmp_rel(ctxt, ctxt->src.val); -- cgit v1.2.3 From 2b42fce6954d1730edaf479d02378703e7b821cb Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 26 Jan 2015 09:32:25 +0200 Subject: KVM: x86: Fix defines in emulator.c Unnecassary define was left after commit 7d882ffa81d5 ("KVM: x86: Revert NoBigReal patch in the emulator"). Commit 39f062ff51b2 ("KVM: x86: Generate #UD when memory operand is required") was missing undef. Fix it. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index db3cf399e39e..997c9ebb70ef 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -167,7 +167,6 @@ #define NoMod ((u64)1 << 47) /* Mod field is ignored */ #define Intercept ((u64)1 << 48) /* Has valid intercept field */ #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */ -#define NoBigReal ((u64)1 << 50) /* No big real mode */ #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */ #define NearBranch ((u64)1 << 52) /* Near branches */ #define No16 ((u64)1 << 53) /* No 16 bit operand */ @@ -4246,6 +4245,7 @@ static const struct opcode opcode_map_0f_38[256] = { #undef GP #undef EXT #undef MD +#undef ID #undef D2bv #undef D2bvIP -- cgit v1.2.3 From bac155310be35e0fa64b066d47625d2a12a75122 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 26 Jan 2015 09:32:26 +0200 Subject: KVM: x86: 32-bit wraparound read/write not emulated correctly If we got a wraparound of 32-bit operand, and the limit is 0xffffffff, read and writes should be successful. It just needs to be done in two segments. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 997c9ebb70ef..c3b07574942f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -684,9 +684,13 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, } if (addr.ea > lim) goto bad; - *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea); - if (size > *max_size) - goto bad; + if (lim == 0xffffffff) + *max_size = ~0u; + else { + *max_size = (u64)lim + 1 - addr.ea; + if (size > *max_size) + goto bad; + } la &= (u32)-1; break; } -- cgit v1.2.3 From 82268083fa78452c1c8be30a82984e470d9678c7 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 26 Jan 2015 09:32:27 +0200 Subject: KVM: x86: Emulation of call may use incorrect stack size On long-mode, when far call that changes cs.l takes place, the stack size is determined by the new mode. For instance, if we go from 32-bit mode to 64-bit mode, the stack-size if 64. KVM uses the old stack size. Fix it. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c3b07574942f..81dcf7964701 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -741,19 +741,26 @@ static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, const struct desc_struct *cs_desc) { enum x86emul_mode mode = ctxt->mode; + int rc; #ifdef CONFIG_X86_64 - if (ctxt->mode >= X86EMUL_MODE_PROT32 && cs_desc->l) { - u64 efer = 0; + if (ctxt->mode >= X86EMUL_MODE_PROT16) { + if (cs_desc->l) { + u64 efer = 0; - ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); - if (efer & EFER_LMA) - mode = X86EMUL_MODE_PROT64; + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + if (efer & EFER_LMA) + mode = X86EMUL_MODE_PROT64; + } else + mode = X86EMUL_MODE_PROT32; /* temporary value */ } #endif if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32) mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; - return assign_eip(ctxt, dst, mode); + rc = assign_eip(ctxt, dst, mode); + if (rc == X86EMUL_CONTINUE) + ctxt->mode = mode; + return rc; } static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) @@ -3062,6 +3069,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) struct desc_struct old_desc, new_desc; const struct x86_emulate_ops *ops = ctxt->ops; int cpl = ctxt->ops->cpl(ctxt); + enum x86emul_mode prev_mode = ctxt->mode; old_eip = ctxt->_eip; ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS); @@ -3085,11 +3093,14 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) rc = em_push(ctxt); /* If we failed, we tainted the memory, but the very least we should restore cs */ - if (rc != X86EMUL_CONTINUE) + if (rc != X86EMUL_CONTINUE) { + pr_warn_once("faulting far call emulation tainted memory\n"); goto fail; + } return rc; fail: ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); + ctxt->mode = prev_mode; return rc; } -- cgit v1.2.3 From d44e1212230a68f9dccd1a95b5c8ca5217c62094 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 9 Feb 2015 10:02:05 +0100 Subject: KVM: x86: emulate: correct page fault error code for NoWrite instructions NoWrite instructions (e.g. cmp or test) never set the "write access" bit in the error code, even if one of the operands is treated as a destination. Fixes: c205fb7d7d4f81e46fc577b707ceb9e356af1456 Cc: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/emulate.c') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 81dcf7964701..a943bf0c06d0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4954,7 +4954,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) rc = segmented_read(ctxt, ctxt->dst.addr.mem, &ctxt->dst.val, ctxt->dst.bytes); if (rc != X86EMUL_CONTINUE) { - if (rc == X86EMUL_PROPAGATE_FAULT && + if (!(ctxt->d & NoWrite) && + rc == X86EMUL_PROPAGATE_FAULT && ctxt->exception.vector == PF_VECTOR) ctxt->exception.error_code |= PFERR_WRITE_MASK; goto done; -- cgit v1.2.3