From c565650b1028bc551e5d16dd0ec8f7078da7cace Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 11 Feb 2015 15:15:10 +1030 Subject: lguest: send trap 13 through to userspace. We copy 7 bytes at eip for userspace's instruction decode; we have to carefully handle the case where eip is at the end of a page. We can't leave this to userspace since kernel has all the page table decode logic. The decode logic moves to userspace, basically unchanged. Signed-off-by: Rusty Russell --- drivers/lguest/x86/core.c | 133 +++++++++++++++------------------------------- 1 file changed, 43 insertions(+), 90 deletions(-) (limited to 'drivers/lguest') diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index f7a16b4ea456..42e87bf14113 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -314,95 +314,52 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) * usually attached to a PC. * * When the Guest uses one of these instructions, we get a trap (General - * Protection Fault) and come here. We see if it's one of those troublesome - * instructions and skip over it. We return true if we did. + * Protection Fault) and come here. We queue this to be sent out to the + * Launcher to handle. */ -static int emulate_insn(struct lg_cpu *cpu) -{ - u8 insn; - unsigned int insnlen = 0, in = 0, small_operand = 0; - /* - * The eip contains the *virtual* address of the Guest's instruction: - * walk the Guest's page tables to find the "physical" address. - */ - unsigned long physaddr = guest_pa(cpu, cpu->regs->eip); - - /* - * This must be the Guest kernel trying to do something, not userspace! - * The bottom two bits of the CS segment register are the privilege - * level. - */ - if ((cpu->regs->cs & 3) != GUEST_PL) - return 0; - /* Decoding x86 instructions is icky. */ - insn = lgread(cpu, physaddr, u8); - - /* - * Around 2.6.33, the kernel started using an emulation for the - * cmpxchg8b instruction in early boot on many configurations. This - * code isn't paravirtualized, and it tries to disable interrupts. - * Ignore it, which will Mostly Work. - */ - if (insn == 0xfa) { - /* "cli", or Clear Interrupt Enable instruction. Skip it. */ - cpu->regs->eip++; - return 1; +/* + * The eip contains the *virtual* address of the Guest's instruction: + * we copy the instruction here so the Launcher doesn't have to walk + * the page tables to decode it. We handle the case (eg. in a kernel + * module) where the instruction is over two pages, and the pages are + * virtually but not physically contiguous. + * + * The longest possible x86 instruction is 15 bytes, but we don't handle + * anything that strange. + */ +static void copy_from_guest(struct lg_cpu *cpu, + void *dst, unsigned long vaddr, size_t len) +{ + size_t to_page_end = PAGE_SIZE - (vaddr % PAGE_SIZE); + unsigned long paddr; + + BUG_ON(len > PAGE_SIZE); + + /* If it goes over a page, copy in two parts. */ + if (len > to_page_end) { + /* But make sure the next page is mapped! */ + if (__guest_pa(cpu, vaddr + to_page_end, &paddr)) + copy_from_guest(cpu, dst + to_page_end, + vaddr + to_page_end, + len - to_page_end); + else + /* Otherwise fill with zeroes. */ + memset(dst + to_page_end, 0, len - to_page_end); + len = to_page_end; } - /* - * 0x66 is an "operand prefix". It means a 16, not 32 bit in/out. - */ - if (insn == 0x66) { - small_operand = 1; - /* The instruction is 1 byte so far, read the next byte. */ - insnlen = 1; - insn = lgread(cpu, physaddr + insnlen, u8); - } + /* This will kill the guest if it isn't mapped, but that + * shouldn't happen. */ + __lgread(cpu, dst, guest_pa(cpu, vaddr), len); +} - /* - * We can ignore the lower bit for the moment and decode the 4 opcodes - * we need to emulate. - */ - switch (insn & 0xFE) { - case 0xE4: /* in ,%al */ - insnlen += 2; - in = 1; - break; - case 0xEC: /* in (%dx),%al */ - insnlen += 1; - in = 1; - break; - case 0xE6: /* out %al, */ - insnlen += 2; - break; - case 0xEE: /* out %al,(%dx) */ - insnlen += 1; - break; - default: - /* OK, we don't know what this is, can't emulate. */ - return 0; - } - /* - * If it was an "IN" instruction, they expect the result to be read - * into %eax, so we change %eax. We always return all-ones, which - * traditionally means "there's nothing there". - */ - if (in) { - /* Lower bit tells means it's a 32/16 bit access */ - if (insn & 0x1) { - if (small_operand) - cpu->regs->eax |= 0xFFFF; - else - cpu->regs->eax = 0xFFFFFFFF; - } else - cpu->regs->eax |= 0xFF; - } - /* Finally, we've "done" the instruction, so move past it. */ - cpu->regs->eip += insnlen; - /* Success! */ - return 1; +static void setup_emulate_insn(struct lg_cpu *cpu) +{ + cpu->pending.trap = 13; + copy_from_guest(cpu, cpu->pending.insn, cpu->regs->eip, + sizeof(cpu->pending.insn)); } /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ @@ -410,14 +367,10 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) { switch (cpu->regs->trapnum) { case 13: /* We've intercepted a General Protection Fault. */ - /* - * Check if this was one of those annoying IN or OUT - * instructions which we need to emulate. If so, we just go - * back into the Guest after we've done it. - */ + /* Hand to Launcher to emulate those pesky IN and OUT insns */ if (cpu->regs->errcode == 0) { - if (emulate_insn(cpu)) - return; + setup_emulate_insn(cpu); + return; } break; case 14: /* We've intercepted a Page Fault. */ -- cgit v1.2.3