summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2011-07-22 14:39:48 +0930
committerRusty Russell <rusty@rustcorp.com.au>2011-07-22 14:39:48 +0930
commit5dea1c88ed11a1221581c4b202f053c4fc138704 (patch)
tree59e15d3c696712e26ffb229ff987f33bcc72affe
parente0377e25206328998d036cafddcd00a7c3252e3e (diff)
lguest: use a special 1:1 linear pagetable mode until first switch.
The Host used to create some page tables for the Guest to use at the top of Guest memory; it would then tell the Guest where this was. In particular, it created linear mappings for 0 and 0xC0000000 addresses because lguest used to switch to its real page tables quite late in boot. However, since d50d8fe19 Linux initialized boot page tables in head_32.S even before the "are we lguest?" boot jump. So, now we can simplify things: the Host pagetable code assumes 1:1 linear mapping until it first calls the LHCALL_NEW_PGTABLE hypercall, which we now do before we reach C code. This also means that the Host doesn't need to know anything about the Guest's PAGE_OFFSET. (Non-Linux guests might not even have such a thing). Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/lguest/boot.c11
-rw-r--r--arch/x86/lguest/i386_head.S9
-rw-r--r--drivers/lguest/lg.h2
-rw-r--r--drivers/lguest/page_tables.c278
-rw-r--r--include/linux/lguest.h2
6 files changed, 98 insertions, 205 deletions
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index c29d631af6fc..395a10e68067 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -63,7 +63,6 @@ void foo(void)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending);
- OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir);
BLANK();
OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index db832fd65ecb..719a32c60516 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -520,17 +520,16 @@ static unsigned long lguest_read_cr2(void)
/* See lguest_set_pte() below. */
static bool cr3_changed = false;
+static unsigned long current_cr3;
/*
* cr3 is the current toplevel pagetable page: the principle is the same as
- * cr0. Keep a local copy, and tell the Host when it changes. The only
- * difference is that our local copy is in lguest_data because the Host needs
- * to set it upon our initial hypercall.
+ * cr0. Keep a local copy, and tell the Host when it changes.
*/
static void lguest_write_cr3(unsigned long cr3)
{
- lguest_data.pgdir = cr3;
lazy_hcall1(LHCALL_NEW_PGTABLE, cr3);
+ current_cr3 = cr3;
/* These two page tables are simple, linear, and used during boot */
if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table))
@@ -539,7 +538,7 @@ static void lguest_write_cr3(unsigned long cr3)
static unsigned long lguest_read_cr3(void)
{
- return lguest_data.pgdir;
+ return current_cr3;
}
/* cr4 is used to enable and disable PGE, but we don't care. */
@@ -758,7 +757,7 @@ static void lguest_pmd_clear(pmd_t *pmdp)
static void lguest_flush_tlb_single(unsigned long addr)
{
/* Simply set it to zero: if it was not, it will fault back in. */
- lazy_hcall3(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0);
+ lazy_hcall3(LHCALL_SET_PTE, current_cr3, addr, 0);
}
/*
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 4f420c2f2d55..863b03a9fbff 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -27,13 +27,18 @@
.section .init.text, "ax", @progbits
ENTRY(lguest_entry)
/*
- * We make the "initialization" hypercall now to tell the Host about
- * us, and also find out where it put our page tables.
+ * We make the "initialization" hypercall now to tell the Host where
+ * our lguest_data struct is.
*/
movl $LHCALL_LGUEST_INIT, %eax
movl $lguest_data - __PAGE_OFFSET, %ebx
int $LGUEST_TRAP_ENTRY
+ /* Now turn our pagetables on; setup by arch/x86/kernel/head_32.S. */
+ movl $LHCALL_NEW_PGTABLE, %eax
+ movl $(initial_page_table - __PAGE_OFFSET), %ebx
+ int $LGUEST_TRAP_ENTRY
+
/* Set up the initial stack so we can run C code. */
movl $(init_thread_union+THREAD_SIZE),%esp
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 9136411fadd5..295df06e6590 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -59,6 +59,8 @@ struct lg_cpu {
struct lguest_pages *last_pages;
+ /* Initialization mode: linear map everything. */
+ bool linear_pages;
int cpu_pgd; /* Which pgd this cpu is currently using */
/* If a hypercall was asked for, this points to the arguments. */
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index d21578ee95de..00026222bde8 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -17,7 +17,6 @@
#include <linux/percpu.h>
#include <asm/tlbflush.h>
#include <asm/uaccess.h>
-#include <asm/bootparam.h>
#include "lg.h"
/*M:008
@@ -325,10 +324,15 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
#endif
/* First step: get the top-level Guest page table entry. */
- gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
- /* Toplevel not present? We can't map it in. */
- if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
- return false;
+ if (unlikely(cpu->linear_pages)) {
+ /* Faking up a linear mapping. */
+ gpgd = __pgd(CHECK_GPGD_MASK);
+ } else {
+ gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
+ /* Toplevel not present? We can't map it in. */
+ if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
+ return false;
+ }
/* Now look at the matching shadow entry. */
spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
@@ -353,10 +357,15 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
}
#ifdef CONFIG_X86_PAE
- gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
- /* Middle level not present? We can't map it in. */
- if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
- return false;
+ if (unlikely(cpu->linear_pages)) {
+ /* Faking up a linear mapping. */
+ gpmd = __pmd(_PAGE_TABLE);
+ } else {
+ gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
+ /* Middle level not present? We can't map it in. */
+ if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
+ return false;
+ }
/* Now look at the matching shadow entry. */
spmd = spmd_addr(cpu, *spgd, vaddr);
@@ -397,8 +406,13 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
#endif
- /* Read the actual PTE value. */
- gpte = lgread(cpu, gpte_ptr, pte_t);
+ if (unlikely(cpu->linear_pages)) {
+ /* Linear? Make up a PTE which points to same page. */
+ gpte = __pte((vaddr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
+ } else {
+ /* Read the actual PTE value. */
+ gpte = lgread(cpu, gpte_ptr, pte_t);
+ }
/* If this page isn't in the Guest page tables, we can't page it in. */
if (!(pte_flags(gpte) & _PAGE_PRESENT))
@@ -454,7 +468,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
* Finally, we write the Guest PTE entry back: we've set the
* _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags.
*/
- lgwrite(cpu, gpte_ptr, pte_t, gpte);
+ if (likely(!cpu->linear_pages))
+ lgwrite(cpu, gpte_ptr, pte_t, gpte);
/*
* The fault is fixed, the page table is populated, the mapping
@@ -612,6 +627,11 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
#ifdef CONFIG_X86_PAE
pmd_t gpmd;
#endif
+
+ /* Still not set up? Just map 1:1. */
+ if (unlikely(cpu->linear_pages))
+ return vaddr;
+
/* First step: get the top-level Guest page table entry. */
gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
/* Toplevel not present? We can't map it in. */
@@ -708,32 +728,6 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
return next;
}
-/*H:430
- * (iv) Switching page tables
- *
- * Now we've seen all the page table setting and manipulation, let's see
- * what happens when the Guest changes page tables (ie. changes the top-level
- * pgdir). This occurs on almost every context switch.
- */
-void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
-{
- int newpgdir, repin = 0;
-
- /* Look to see if we have this one already. */
- newpgdir = find_pgdir(cpu->lg, pgtable);
- /*
- * If not, we allocate or mug an existing one: if it's a fresh one,
- * repin gets set to 1.
- */
- if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs))
- newpgdir = new_pgdir(cpu, pgtable, &repin);
- /* Change the current pgd index to the new one. */
- cpu->cpu_pgd = newpgdir;
- /* If it was completely blank, we map in the Guest kernel stack */
- if (repin)
- pin_stack_pages(cpu);
-}
-
/*H:470
* Finally, a routine which throws away everything: all PGD entries in all
* the shadow page tables, including the Guest's kernel mappings. This is used
@@ -780,6 +774,44 @@ void guest_pagetable_clear_all(struct lg_cpu *cpu)
/* We need the Guest kernel stack mapped again. */
pin_stack_pages(cpu);
}
+
+/*H:430
+ * (iv) Switching page tables
+ *
+ * Now we've seen all the page table setting and manipulation, let's see
+ * what happens when the Guest changes page tables (ie. changes the top-level
+ * pgdir). This occurs on almost every context switch.
+ */
+void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
+{
+ int newpgdir, repin = 0;
+
+ /*
+ * The very first time they call this, we're actually running without
+ * any page tables; we've been making it up. Throw them away now.
+ */
+ if (unlikely(cpu->linear_pages)) {
+ release_all_pagetables(cpu->lg);
+ cpu->linear_pages = false;
+ /* Force allocation of a new pgdir. */
+ newpgdir = ARRAY_SIZE(cpu->lg->pgdirs);
+ } else {
+ /* Look to see if we have this one already. */
+ newpgdir = find_pgdir(cpu->lg, pgtable);
+ }
+
+ /*
+ * If not, we allocate or mug an existing one: if it's a fresh one,
+ * repin gets set to 1.
+ */
+ if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs))
+ newpgdir = new_pgdir(cpu, pgtable, &repin);
+ /* Change the current pgd index to the new one. */
+ cpu->cpu_pgd = newpgdir;
+ /* If it was completely blank, we map in the Guest kernel stack */
+ if (repin)
+ pin_stack_pages(cpu);
+}
/*:*/
/*M:009
@@ -919,168 +951,26 @@ void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx)
}
#endif
-/*H:505
- * To get through boot, we construct simple identity page mappings (which
- * set virtual == physical) and linear mappings which will get the Guest far
- * enough into the boot to create its own. The linear mapping means we
- * simplify the Guest boot, but it makes assumptions about their PAGE_OFFSET,
- * as you'll see.
- *
- * We lay them out of the way, just below the initrd (which is why we need to
- * know its size here).
- */
-static unsigned long setup_pagetables(struct lguest *lg,
- unsigned long mem,
- unsigned long initrd_size)
-{
- pgd_t __user *pgdir;
- pte_t __user *linear;
- unsigned long mem_base = (unsigned long)lg->mem_base;
- unsigned int mapped_pages, i, linear_pages;
-#ifdef CONFIG_X86_PAE
- pmd_t __user *pmds;
- unsigned int j;
- pgd_t pgd;
- pmd_t pmd;
-#else
- unsigned int phys_linear;
-#endif
-
- /*
- * We have mapped_pages frames to map, so we need linear_pages page
- * tables to map them.
- */
- mapped_pages = mem / PAGE_SIZE;
- linear_pages = (mapped_pages + PTRS_PER_PTE - 1) / PTRS_PER_PTE;
-
- /* We put the toplevel page directory page at the top of memory. */
- pgdir = (pgd_t *)(mem + mem_base - initrd_size - PAGE_SIZE);
-
- /* Now we use the next linear_pages pages as pte pages */
- linear = (void *)pgdir - linear_pages * PAGE_SIZE;
-
-#ifdef CONFIG_X86_PAE
- /*
- * And the single mid page goes below that. We only use one, but
- * that's enough to map 1G, which definitely gets us through boot.
- */
- pmds = (void *)linear - PAGE_SIZE;
-#endif
- /*
- * Linear mapping is easy: put every page's address into the
- * mapping in order.
- */
- for (i = 0; i < mapped_pages; i++) {
- pte_t pte;
- pte = pfn_pte(i, __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER));
- if (copy_to_user(&linear[i], &pte, sizeof(pte)) != 0)
- return -EFAULT;
- }
-
-#ifdef CONFIG_X86_PAE
- /*
- * Make the Guest PMD entries point to the corresponding place in the
- * linear mapping (up to one page worth of PMD).
- */
- for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD;
- i += PTRS_PER_PTE, j++) {
- pmd = pfn_pmd(((unsigned long)&linear[i] - mem_base)/PAGE_SIZE,
- __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
-
- if (copy_to_user(&pmds[j], &pmd, sizeof(pmd)) != 0)
- return -EFAULT;
- }
-
- /* One PGD entry, pointing to that PMD page. */
- pgd = __pgd(((unsigned long)pmds - mem_base) | _PAGE_PRESENT);
- /* Copy it in as the first PGD entry (ie. addresses 0-1G). */
- if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0)
- return -EFAULT;
- /*
- * And the other PGD entry to make the linear mapping at PAGE_OFFSET
- */
- if (copy_to_user(&pgdir[KERNEL_PGD_BOUNDARY], &pgd, sizeof(pgd)))
- return -EFAULT;
-#else
- /*
- * The top level points to the linear page table pages above.
- * We setup the identity and linear mappings here.
- */
- phys_linear = (unsigned long)linear - mem_base;
- for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) {
- pgd_t pgd;
- /*
- * Create a PGD entry which points to the right part of the
- * linear PTE pages.
- */
- pgd = __pgd((phys_linear + i * sizeof(pte_t)) |
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
-
- /*
- * Copy it into the PGD page at 0 and PAGE_OFFSET.
- */
- if (copy_to_user(&pgdir[i / PTRS_PER_PTE], &pgd, sizeof(pgd))
- || copy_to_user(&pgdir[pgd_index(PAGE_OFFSET)
- + i / PTRS_PER_PTE],
- &pgd, sizeof(pgd)))
- return -EFAULT;
- }
-#endif
-
- /*
- * We return the top level (guest-physical) address: we remember where
- * this is to write it into lguest_data when the Guest initializes.
- */
- return (unsigned long)pgdir - mem_base;
-}
-
/*H:500
* (vii) Setting up the page tables initially.
*
- * When a Guest is first created, the Launcher tells us where the toplevel of
- * its first page table is. We set some things up here:
+ * When a Guest is first created, set initialize a shadow page table which
+ * we will populate on future faults. The Guest doesn't have any actual
+ * pagetables yet, so we set linear_pages to tell demand_page() to fake it
+ * for the moment.
*/
int init_guest_pagetable(struct lguest *lg)
{
- u64 mem;
- u32 initrd_size;
- struct boot_params __user *boot = (struct boot_params *)lg->mem_base;
-#ifdef CONFIG_X86_PAE
- pgd_t *pgd;
- pmd_t *pmd_table;
-#endif
- /*
- * Get the Guest memory size and the ramdisk size from the boot header
- * located at lg->mem_base (Guest address 0).
- */
- if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem))
- || get_user(initrd_size, &boot->hdr.ramdisk_size))
- return -EFAULT;
+ struct lg_cpu *cpu = &lg->cpus[0];
+ int allocated = 0;
- /*
- * We start on the first shadow page table, and give it a blank PGD
- * page.
- */
- lg->pgdirs[0].gpgdir = setup_pagetables(lg, mem, initrd_size);
- if (IS_ERR_VALUE(lg->pgdirs[0].gpgdir))
- return lg->pgdirs[0].gpgdir;
- lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
- if (!lg->pgdirs[0].pgdir)
+ /* lg (and lg->cpus[]) starts zeroed: this allocates a new pgdir */
+ cpu->cpu_pgd = new_pgdir(cpu, 0, &allocated);
+ if (!allocated)
return -ENOMEM;
-#ifdef CONFIG_X86_PAE
- /* For PAE, we also create the initial mid-level. */
- pgd = lg->pgdirs[0].pgdir;
- pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL);
- if (!pmd_table)
- return -ENOMEM;
-
- set_pgd(pgd + SWITCHER_PGD_INDEX,
- __pgd(__pa(pmd_table) | _PAGE_PRESENT));
-#endif
-
- /* This is the current page table. */
- lg->cpus[0].cpu_pgd = 0;
+ /* We start with a linear mapping until the initialize. */
+ cpu->linear_pages = true;
return 0;
}
@@ -1095,10 +985,10 @@ void page_table_guest_data_init(struct lg_cpu *cpu)
* of virtual addresses used by the Switcher.
*/
|| put_user(RESERVE_MEM * 1024 * 1024,
- &cpu->lg->lguest_data->reserve_mem)
- || put_user(cpu->lg->pgdirs[0].gpgdir,
- &cpu->lg->lguest_data->pgdir))
+ &cpu->lg->lguest_data->reserve_mem)) {
kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
+ return;
+ }
/*
* In flush_user_mappings() we loop from 0 to
diff --git a/include/linux/lguest.h b/include/linux/lguest.h
index 2fb1dcbcb5aa..9962c6bb1311 100644
--- a/include/linux/lguest.h
+++ b/include/linux/lguest.h
@@ -59,8 +59,6 @@ struct lguest_data {
unsigned long reserve_mem;
/* KHz for the TSC clock. */
u32 tsc_khz;
- /* Page where the top-level pagetable is */
- unsigned long pgdir;
/* Fields initialized by the Guest at boot: */
/* Instruction range to suppress interrupts even if enabled */