summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVinod Rex <vrex@nvidia.com>2011-08-26 11:42:40 -0700
committerSimone Willett <swillett@nvidia.com>2011-09-22 10:04:27 -0700
commit0e954653476e496979d22c77a3bbe90da42aff52 (patch)
tree03e90c0a745c0cc3f40e0bd6ed52f286722f4d6f
parentc14cc78c0ecc008e44784ef28b5ea83eff5e47fb (diff)
arm: mm: change_page_attr support
bug 865816 Adapted from x86 change_page_attr() implementation (cherry picked from commit b95306beb700ea1dddb228d3f1acfe4efc36f2f6) Change-Id: I9dfaf9e0e0517840848b5a0fd98f2c3f6ed5d3ca Reviewed-on: http://git-master/r/53827 Reviewed-by: Vinod Rex <vrex@nvidia.com> Reviewed-by: Krishna Reddy <vdumpa@nvidia.com> Tested-by: Krishna Reddy <vdumpa@nvidia.com>
-rw-r--r--arch/arm/include/asm/cacheflush.h49
-rw-r--r--arch/arm/include/asm/page.h2
-rw-r--r--arch/arm/include/asm/pgtable.h20
-rw-r--r--arch/arm/mm/Makefile2
-rw-r--r--arch/arm/mm/mmu.c9
-rw-r--r--arch/arm/mm/pageattr.c993
-rw-r--r--arch/arm/mm/pgd.c31
7 files changed, 1105 insertions, 1 deletions
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 8c885b6f97f8..d72b4db215a6 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -449,4 +449,53 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
flush_cache_all();
}
+/*
+ * The set_memory_* API can be used to change various attributes of a virtual
+ * address range. The attributes include:
+ * Cachability : UnCached, WriteCombining, WriteBack
+ * Executability : eXeutable, NoteXecutable
+ * Read/Write : ReadOnly, ReadWrite
+ * Presence : NotPresent
+ *
+ * Within a catagory, the attributes are mutually exclusive.
+ *
+ * The implementation of this API will take care of various aspects that
+ * are associated with changing such attributes, such as:
+ * - Flushing TLBs
+ * - Flushing CPU caches
+ * - Making sure aliases of the memory behind the mapping don't violate
+ * coherency rules as defined by the CPU in the system.
+ *
+ * What this API does not do:
+ * - Provide exclusion between various callers - including callers that
+ * operation on other mappings of the same physical page
+ * - Restore default attributes when a page is freed
+ * - Guarantee that mappings other than the requested one are
+ * in any state, other than that these do not violate rules for
+ * the CPU you have. Do not depend on any effects on other mappings,
+ * CPUs other than the one you have may have more relaxed rules.
+ * The caller is required to take care of these.
+ */
+
+int set_memory_uc(unsigned long addr, int numpages);
+int set_memory_wc(unsigned long addr, int numpages);
+int set_memory_wb(unsigned long addr, int numpages);
+int set_memory_iwb(unsigned long addr, int numpages);
+int set_memory_x(unsigned long addr, int numpages);
+int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_ro(unsigned long addr, int numpages);
+int set_memory_rw(unsigned long addr, int numpages);
+int set_memory_np(unsigned long addr, int numpages);
+int set_memory_4k(unsigned long addr, int numpages);
+
+int set_memory_array_uc(unsigned long *addr, int addrinarray);
+int set_memory_array_wc(unsigned long *addr, int addrinarray);
+int set_memory_array_wb(unsigned long *addr, int addrinarray);
+int set_memory_array_iwb(unsigned long *addr, int addrinarray);
+
+int set_pages_array_uc(struct page **pages, int addrinarray);
+int set_pages_array_wc(struct page **pages, int addrinarray);
+int set_pages_array_wb(struct page **pages, int addrinarray);
+int set_pages_array_iwb(struct page **pages, int addrinarray);
+
#endif
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index a485ac3c8696..899bcc35897f 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -199,6 +199,8 @@ typedef struct page *pgtable_t;
extern int pfn_valid(unsigned long);
#endif
+extern phys_addr_t lowmem_end_addr;
+
#include <asm/memory.h>
#endif /* !__ASSEMBLY__ */
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index ab08cd74e7d3..64281a650b32 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -257,7 +257,9 @@ extern struct page *empty_zero_page;
#define ZERO_PAGE(vaddr) (empty_zero_page)
#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT)
+#define pmd_pfn(pmd) ((pmd_val(pmd) & SECTION_MASK) >> PAGE_SHIFT)
#define pfn_pte(pfn,prot) (__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define pte_pgprot(pte) ((pgprot_t)(pte_val(pte) & ~PAGE_MASK))
#define pte_none(pte) (!pte_val(pte))
#define pte_clear(mm,addr,ptep) set_pte_ext(ptep, __pte(0), 0)
@@ -366,6 +368,24 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
clean_pmd_entry(pmdp); \
} while (0)
+extern spinlock_t pgd_lock;
+extern struct list_head pgd_list;
+
+pte_t *lookup_address(unsigned long address, unsigned int *level);
+enum {
+ PG_LEVEL_NONE,
+ PG_LEVEL_4K,
+ PG_LEVEL_2M,
+ PG_LEVEL_NUM
+};
+
+#ifdef CONFIG_PROC_FS
+extern void update_page_count(int level, unsigned long pages);
+#else
+static inline void update_page_count(int level, unsigned long pages) { }
+#endif
+
+
static inline pte_t *pmd_page_vaddr(pmd_t pmd)
{
unsigned long ptr;
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index d63b6c413758..fb7b513bee27 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -6,7 +6,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
iomap.o
obj-$(CONFIG_MMU) += fault-armv.o flush.o ioremap.o mmap.o \
- pgd.o mmu.o vmregion.o
+ pgd.o mmu.o vmregion.o pageattr.o
ifneq ($(CONFIG_MMU),y)
obj-y += nommu.o
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index fa2637b5479a..7df956a67267 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -561,6 +561,7 @@ static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
const struct mem_type *type)
{
pmd_t *pmd = pmd_offset(pgd, addr);
+ unsigned long pages_2m = 0, pages_4k = 0;
/*
* Try a section mapping - end, addr and phys must all be aligned
@@ -579,6 +580,8 @@ static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
phys += SECTION_SIZE;
} while (pmd++, addr += SECTION_SIZE, addr != end);
+ pages_2m += (end-addr) >> SECTION_SHIFT;
+
flush_pmd_entry(p);
} else {
/*
@@ -586,6 +589,12 @@ static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
* individual L1 entries.
*/
alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
+ pages_4k += (end-addr) >> PAGE_SHIFT;
+ }
+
+ if ((addr < lowmem_end_addr) && (end < lowmem_end_addr)) {
+ update_page_count(PG_LEVEL_2M, pages_2m);
+ update_page_count(PG_LEVEL_4K, pages_4k);
}
}
diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c
new file mode 100644
index 000000000000..ba57e5d9ca50
--- /dev/null
+++ b/arch/arm/mm/pageattr.c
@@ -0,0 +1,993 @@
+/*
+ * Copyright 2002 Andi Kleen, SuSE Labs.
+ * Thanks to Ben LaHaise for precious feedback.
+ */
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/pfn.h>
+#include <linux/percpu.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+
+#include <asm/processor.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+
+#ifdef CPA_DEBUG
+#define cpa_debug(x, ...) printk(x, __VA_ARGS__)
+#else
+#define cpa_debug(x, ...)
+#endif
+
+/*
+ * The current flushing context - we pass it instead of 5 arguments:
+ */
+struct cpa_data {
+ unsigned long *vaddr;
+ pgprot_t mask_set;
+ pgprot_t mask_clr;
+ int numpages;
+ int flags;
+ unsigned long pfn;
+ unsigned force_split:1;
+ int curpage;
+ struct page **pages;
+};
+
+/*
+ * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings)
+ * using cpa_lock. So that we don't allow any other cpu, with stale large tlb
+ * entries change the page attribute in parallel to some other cpu
+ * splitting a large page entry along with changing the attribute.
+ */
+static DEFINE_SPINLOCK(cpa_lock);
+
+#define CPA_FLUSHTLB 1
+#define CPA_ARRAY 2
+#define CPA_PAGES_ARRAY 4
+
+#ifdef CONFIG_PROC_FS
+static unsigned long direct_pages_count[PG_LEVEL_NUM];
+
+void update_page_count(int level, unsigned long pages)
+{
+ unsigned long flags;
+
+ /* Protect against CPA */
+ spin_lock_irqsave(&pgd_lock, flags);
+ direct_pages_count[level] += pages;
+ spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+static void split_page_count(int level)
+{
+ direct_pages_count[level]--;
+ direct_pages_count[level - 1] += PTRS_PER_PTE;
+}
+
+void arch_report_meminfo(struct seq_file *m)
+{
+ seq_printf(m, "DirectMap4k: %8lu kB\n",
+ direct_pages_count[PG_LEVEL_4K] << 2);
+ seq_printf(m, "DirectMap2M: %8lu kB\n",
+ direct_pages_count[PG_LEVEL_2M] << 11);
+}
+#else
+static inline void split_page_count(int level) { }
+#endif
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+# define debug_pagealloc 1
+#else
+# define debug_pagealloc 0
+#endif
+
+static inline int
+within(unsigned long addr, unsigned long start, unsigned long end)
+{
+ return addr >= start && addr < end;
+}
+
+static void cpa_flush_range(unsigned long start, int numpages, int cache)
+{
+ unsigned int i, level;
+ unsigned long addr;
+
+ BUG_ON(irqs_disabled());
+ WARN_ON(PAGE_ALIGN(start) != start);
+
+ flush_tlb_kernel_range(start, start + (numpages << PAGE_SHIFT));
+
+ if (!cache)
+ return;
+
+ for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) {
+ pte_t *pte = lookup_address(addr, &level);
+
+ /*
+ * Only flush present addresses:
+ */
+ if (pte && pte_present(*pte)) {
+ __cpuc_flush_dcache_area((void *) addr, PAGE_SIZE);
+ outer_flush_range(__pa((void *)addr),
+ __pa((void *)addr) + PAGE_SIZE);
+ }
+ }
+}
+
+static void cpa_flush_array(unsigned long *start, int numpages, int cache,
+ int in_flags, struct page **pages)
+{
+ unsigned int i, level;
+
+ BUG_ON(irqs_disabled());
+
+ for (i = 0; i < numpages; i++) {
+ unsigned long addr;
+ pte_t *pte;
+
+ if (in_flags & CPA_PAGES_ARRAY)
+ addr = (unsigned long)page_address(pages[i]);
+ else
+ addr = start[i];
+
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+ if (cache) {
+
+ pte = lookup_address(addr, &level);
+
+ /*
+ * Only flush present addresses:
+ */
+ if (pte && pte_present(*pte)) {
+ __cpuc_flush_dcache_area((void *)addr,
+ PAGE_SIZE);
+ outer_flush_range(__pa((void *)addr),
+ __pa((void *)addr) + PAGE_SIZE);
+ }
+ }
+ }
+}
+
+/*
+ * Certain areas of memory require very specific protection flags,
+ * for example the kernel text. Callers don't always get this
+ * right so this function checks and fixes these known static
+ * required protection bits.
+ */
+static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
+ unsigned long pfn)
+{
+ pgprot_t forbidden = __pgprot(0);
+
+ /*
+ * The kernel text needs to be executable for obvious reasons
+ * Does not cover __inittext since that is gone later on.
+ */
+ if (within(address, (unsigned long)_text, (unsigned long)_etext))
+ prot |= L_PTE_EXEC;
+
+ /*
+ * The .rodata section needs to be read-only. Using the pfn
+ * catches all aliases.
+ */
+ if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
+ __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
+ pgprot_val(forbidden) |= L_PTE_WRITE;
+
+ prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
+
+
+ return prot;
+}
+
+static inline pgprot_t pte_to_pmd_pgprot(unsigned long pte,
+ unsigned long ext_prot)
+{
+ pgprot_t ref_prot = 0;
+
+ ref_prot |= PMD_TYPE_SECT | PMD_SECT_XN;
+
+ if (pte & L_PTE_MT_BUFFERABLE)
+ ref_prot |= PMD_SECT_BUFFERABLE;
+
+ if (pte & L_PTE_MT_WRITETHROUGH)
+ ref_prot |= PMD_SECT_CACHEABLE;
+
+ if (pte & L_PTE_SHARED)
+ ref_prot |= PMD_SECT_S;
+
+ if (pte & L_PTE_EXEC)
+ ref_prot &= ~PMD_SECT_XN;
+
+ if (pte & L_PTE_WRITE)
+ ref_prot |= PMD_SECT_AP_WRITE;
+
+ ref_prot |= (ext_prot & (PTE_EXT_AP0 | PTE_EXT_AP1 | PTE_EXT_APX |
+ PTE_EXT_NG | (7 << 6))) << 6;
+
+ return ref_prot;
+}
+
+static inline pgprot_t pmd_to_pte_pgprot(unsigned long pmd,
+ unsigned long *ext_prot)
+{
+ pgprot_t ref_prot = 0;
+
+ ref_prot |= L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_EXEC;
+
+ if (pmd & PMD_SECT_BUFFERABLE)
+ ref_prot |= L_PTE_MT_BUFFERABLE;
+
+ if (pmd & PMD_SECT_CACHEABLE)
+ ref_prot |= L_PTE_MT_WRITETHROUGH;
+
+ if (pmd & PMD_SECT_S)
+ ref_prot |= L_PTE_SHARED;
+
+ if (pmd & PMD_SECT_XN)
+ ref_prot &= ~L_PTE_EXEC;
+
+ if (pmd & PMD_SECT_AP_WRITE)
+ ref_prot |= L_PTE_WRITE;
+
+ /* AP/APX/TEX bits */
+ *ext_prot = (pmd & (PMD_SECT_AP_WRITE | PMD_SECT_AP_READ |
+ PMD_SECT_APX | PMD_SECT_nG | (7 << 12))) >> 6;
+
+ return ref_prot;
+}
+
+/*
+ * Lookup the page table entry for a virtual address. Return a pointer
+ * to the entry and the level of the mapping.
+ *
+ * Note: We return pud and pmd either when the entry is marked large
+ * or when the present bit is not set. Otherwise we would return a
+ * pointer to a nonexisting mapping.
+ */
+pte_t *lookup_address(unsigned long address, unsigned int *level)
+{
+ pgd_t *pgd = pgd_offset_k(address);
+ pte_t *pte;
+ pmd_t *pmd;
+
+ /* pmds are folded into pgds on ARM */
+ *level = PG_LEVEL_NONE;
+
+ if (pgd == NULL || pgd_none(*pgd))
+ return NULL;
+
+ pmd = pmd_offset(pgd, address);
+
+ if (pmd == NULL || pmd_none(*pmd) || !pmd_present(*pmd))
+ return NULL;
+
+ if (((pmd_val(*pmd) & (PMD_TYPE_SECT | PMD_SECT_SUPER))
+ == (PMD_TYPE_SECT | PMD_SECT_SUPER)) || !pmd_present(*pmd)) {
+
+ return NULL;
+ } else if (pmd_val(*pmd) & PMD_TYPE_SECT) {
+
+ *level = PG_LEVEL_2M;
+ return (pte_t *)pmd;
+ }
+
+ pte = pte_offset_kernel(pmd, address);
+
+ if ((pte == NULL) || pte_none(*pte))
+ return NULL;
+
+ *level = PG_LEVEL_4K;
+
+ return pte;
+}
+EXPORT_SYMBOL_GPL(lookup_address);
+
+/*
+ * Set the new pmd in all the pgds we know about:
+ */
+static void __set_pmd_pte(pmd_t *pmd, unsigned long address, pte_t *pte)
+{
+ struct page *page;
+
+ cpa_debug("__set_pmd_pte %x %x %x\n", pmd, pte, *pte);
+
+ /* change init_mm */
+ pmd_populate_kernel(&init_mm, pmd, pte);
+
+ /* change entry in all the pgd's */
+ list_for_each_entry(page, &pgd_list, lru) {
+ cpa_debug("list %x %x %x\n", (unsigned long)page,
+ (unsigned long)pgd_index(address), address);
+ pmd = pmd_offset(((pgd_t *)page_address(page)) +
+ pgd_index(address), address);
+ pmd_populate_kernel(NULL, pmd, pte);
+ }
+
+}
+
+static int
+try_preserve_large_page(pte_t *kpte, unsigned long address,
+ struct cpa_data *cpa)
+{
+ unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
+ pte_t old_pte, *tmp;
+ pgprot_t old_prot, new_prot, ext_prot, req_prot;
+ int i, do_split = 1;
+ unsigned int level;
+
+ if (cpa->force_split)
+ return 1;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ /*
+ * Check for races, another CPU might have split this page
+ * up already:
+ */
+ tmp = lookup_address(address, &level);
+ if (tmp != kpte)
+ goto out_unlock;
+
+ switch (level) {
+
+ case PG_LEVEL_2M:
+ psize = PMD_SIZE;
+ pmask = PMD_MASK;
+ break;
+
+ default:
+ do_split = -EINVAL;
+ goto out_unlock;
+ }
+
+ /*
+ * Calculate the number of pages, which fit into this large
+ * page starting at address:
+ */
+ nextpage_addr = (address + psize) & pmask;
+ numpages = (nextpage_addr - address) >> PAGE_SHIFT;
+ if (numpages < cpa->numpages)
+ cpa->numpages = numpages;
+
+ old_prot = new_prot = req_prot = pmd_to_pte_pgprot(pmd_val(*kpte),
+ &ext_prot);
+
+ pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
+ pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
+
+ /*
+ * old_pte points to the large page base address. So we need
+ * to add the offset of the virtual address:
+ */
+ pfn = pmd_pfn(*kpte) + ((address & (psize - 1)) >> PAGE_SHIFT);
+ cpa->pfn = pfn;
+
+ new_prot = static_protections(req_prot, address, pfn);
+
+ /*
+ * We need to check the full range, whether
+ * static_protection() requires a different pgprot for one of
+ * the pages in the range we try to preserve:
+ */
+ addr = address & pmask;
+ pfn = pmd_pfn(old_pte);
+ for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
+ pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
+
+ if (pgprot_val(chk_prot) != pgprot_val(new_prot))
+ goto out_unlock;
+ }
+
+ /*
+ * If there are no changes, return. maxpages has been updated
+ * above:
+ */
+ if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
+ do_split = 0;
+ goto out_unlock;
+ }
+
+ /*
+ * convert prot to pmd format
+ */
+ new_prot = pte_to_pmd_pgprot(new_prot, ext_prot);
+
+ /*
+ * We need to change the attributes. Check, whether we can
+ * change the large page in one go. We request a split, when
+ * the address is not aligned and the number of pages is
+ * smaller than the number of pages in the large page. Note
+ * that we limited the number of possible pages already to
+ * the number of pages in the large page.
+ */
+ if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
+ /*
+ * The address is aligned and the number of pages
+ * covers the full page.
+ */
+ phys_addr_t phys = __pfn_to_phys(pmd_pfn(*kpte));
+ pmd_t *p = (pmd_t *)kpte;
+
+ *kpte++ = __pmd(phys | new_prot);
+ *kpte = __pmd((phys + SECTION_SIZE) | new_prot);
+ flush_pmd_entry(p);
+ cpa->flags |= CPA_FLUSHTLB;
+ do_split = 0;
+ cpa_debug("preserving page at phys %x pmd %x\n", phys, p);
+ }
+
+out_unlock:
+ spin_unlock_irqrestore(&pgd_lock, flags);
+
+ return do_split;
+}
+
+static int split_large_page(pte_t *kpte, unsigned long address)
+{
+ unsigned long flags, pfn, pfninc = 1;
+ unsigned int i, level;
+ pte_t *pbase, *tmp;
+ pgprot_t ref_prot = 0, ext_prot = 0;
+ int ret = 0;
+
+ pbase = pte_alloc_one_kernel(&init_mm, address);
+ if (!pbase)
+ return -ENOMEM;
+
+ cpa_debug("split_large_page %x PMD %x new pte @ %x\n", address,
+ *kpte, pbase);
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ /*
+ * Check for races, another CPU might have split this page
+ * up for us already:
+ */
+ tmp = lookup_address(address, &level);
+ if (tmp != kpte)
+ goto out_unlock;
+
+ /*
+ * we only split 2MB entries for now
+ */
+ if (level != PG_LEVEL_2M) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ ref_prot = pmd_to_pte_pgprot(pmd_val(*kpte), &ext_prot);
+
+ /*
+ * Get the target pfn from the original entry:
+ */
+ pfn = pmd_pfn(*kpte);
+ for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
+ set_pte_ext(&pbase[i], pfn_pte(pfn, ref_prot), ext_prot);
+
+ if (address >= (unsigned long)__va(0) &&
+ address < (unsigned long)__va(lowmem_end_addr))
+ split_page_count(level);
+
+ /*
+ * Install the new, split up pagetable.
+ */
+ __set_pmd_pte((pmd_t *)kpte, address, pbase);
+
+ pbase = NULL;
+
+out_unlock:
+ /*
+ * If we dropped out via the lookup_address check under
+ * pgd_lock then stick the page back into the pool:
+ */
+ if (pbase)
+ pte_free_kernel(&init_mm, pbase);
+
+ spin_unlock_irqrestore(&pgd_lock, flags);
+
+ return ret;
+}
+
+static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
+ int primary)
+{
+ /*
+ * Ignore all non primary paths.
+ */
+ if (!primary)
+ return 0;
+
+ /*
+ * Ignore the NULL PTE for kernel identity mapping, as it is expected
+ * to have holes.
+ * Also set numpages to '1' indicating that we processed cpa req for
+ * one virtual address page and its pfn. TBD: numpages can be set based
+ * on the initial value and the level returned by lookup_address().
+ */
+ if (within(vaddr, PAGE_OFFSET,
+ PAGE_OFFSET + lowmem_end_addr)) {
+ cpa->numpages = 1;
+ cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
+ return 0;
+ } else {
+ WARN(1, KERN_WARNING "CPA: called for zero pte. "
+ "vaddr = %lx cpa->vaddr = %lx\n", vaddr,
+ *cpa->vaddr);
+
+ return -EFAULT;
+ }
+}
+
+static int __change_page_attr(struct cpa_data *cpa, int primary)
+{
+ unsigned long address;
+ int do_split, err;
+ unsigned int level;
+ pte_t *kpte, old_pte;
+
+ if (cpa->flags & CPA_PAGES_ARRAY) {
+ struct page *page = cpa->pages[cpa->curpage];
+
+ if (unlikely(PageHighMem(page)))
+ return 0;
+
+ address = (unsigned long)page_address(page);
+
+ } else if (cpa->flags & CPA_ARRAY)
+ address = cpa->vaddr[cpa->curpage];
+ else
+ address = *cpa->vaddr;
+
+repeat:
+ kpte = lookup_address(address, &level);
+ if (!kpte)
+ return __cpa_process_fault(cpa, address, primary);
+
+ old_pte = *kpte;
+ if (!pte_val(old_pte))
+ return __cpa_process_fault(cpa, address, primary);
+
+ if (level == PG_LEVEL_4K) {
+ pte_t new_pte;
+ pgprot_t new_prot = pte_pgprot(old_pte);
+ unsigned long pfn = pte_pfn(old_pte);
+
+ pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
+ pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
+
+ new_prot = static_protections(new_prot, address, pfn);
+
+ /*
+ * We need to keep the pfn from the existing PTE,
+ * after all we're only going to change it's attributes
+ * not the memory it points to
+ */
+ new_pte = pfn_pte(pfn, new_prot);
+ cpa->pfn = pfn;
+
+ /*
+ * Do we really change anything ?
+ */
+ if (pte_val(old_pte) != pte_val(new_pte)) {
+ set_pte_ext(kpte, new_pte, 0);
+ /*
+ * FIXME : is this needed on arm?
+ * set_pte_ext already does a flush
+ */
+ cpa->flags |= CPA_FLUSHTLB;
+ }
+ cpa->numpages = 1;
+ return 0;
+ }
+
+ /*
+ * Check, whether we can keep the large page intact
+ * and just change the pte:
+ */
+ do_split = try_preserve_large_page(kpte, address, cpa);
+
+ /*
+ * When the range fits into the existing large page,
+ * return. cp->numpages and cpa->tlbflush have been updated in
+ * try_large_page:
+ */
+ if (do_split <= 0)
+ return do_split;
+
+ /*
+ * We have to split the large page:
+ */
+ err = split_large_page(kpte, address);
+
+ if (!err) {
+ /*
+ * Do a global flush tlb after splitting the large page
+ * and before we do the actual change page attribute in the PTE.
+ *
+ * With out this, we violate the TLB application note, that says
+ * "The TLBs may contain both ordinary and large-page
+ * translations for a 4-KByte range of linear addresses. This
+ * may occur if software modifies the paging structures so that
+ * the page size used for the address range changes. If the two
+ * translations differ with respect to page frame or attributes
+ * (e.g., permissions), processor behavior is undefined and may
+ * be implementation-specific."
+ *
+ * We do this global tlb flush inside the cpa_lock, so that we
+ * don't allow any other cpu, with stale tlb entries change the
+ * page attribute in parallel, that also falls into the
+ * just split large page entry.
+ */
+ flush_tlb_all();
+ goto repeat;
+ }
+
+ return err;
+}
+
+static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
+
+static int cpa_process_alias(struct cpa_data *cpa)
+{
+ struct cpa_data alias_cpa;
+ unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
+ unsigned long vaddr;
+ int ret;
+
+ if (cpa->pfn >= (lowmem_end_addr >> PAGE_SHIFT))
+ return 0;
+
+ /*
+ * No need to redo, when the primary call touched the direct
+ * mapping already:
+ */
+ if (cpa->flags & CPA_PAGES_ARRAY) {
+ struct page *page = cpa->pages[cpa->curpage];
+ if (unlikely(PageHighMem(page)))
+ return 0;
+ vaddr = (unsigned long)page_address(page);
+ } else if (cpa->flags & CPA_ARRAY)
+ vaddr = cpa->vaddr[cpa->curpage];
+ else
+ vaddr = *cpa->vaddr;
+
+ if (!(within(vaddr, PAGE_OFFSET,
+ PAGE_OFFSET + lowmem_end_addr))) {
+
+ alias_cpa = *cpa;
+ alias_cpa.vaddr = &laddr;
+ alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
+
+ ret = __change_page_attr_set_clr(&alias_cpa, 0);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
+{
+ int ret, numpages = cpa->numpages;
+
+ while (numpages) {
+ /*
+ * Store the remaining nr of pages for the large page
+ * preservation check.
+ */
+ cpa->numpages = numpages;
+ /* for array changes, we can't use large page */
+ if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
+ cpa->numpages = 1;
+
+ if (!debug_pagealloc)
+ spin_lock(&cpa_lock);
+ ret = __change_page_attr(cpa, checkalias);
+ if (!debug_pagealloc)
+ spin_unlock(&cpa_lock);
+ if (ret)
+ return ret;
+
+ if (checkalias) {
+ ret = cpa_process_alias(cpa);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * Adjust the number of pages with the result of the
+ * CPA operation. Either a large page has been
+ * preserved or a single page update happened.
+ */
+ BUG_ON(cpa->numpages > numpages);
+ numpages -= cpa->numpages;
+ if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
+ cpa->curpage++;
+ else
+ *cpa->vaddr += cpa->numpages * PAGE_SIZE;
+ }
+ return 0;
+}
+
+static inline int cache_attr(pgprot_t attr)
+{
+ /*
+ * We need to flush the cache for all memory type changes
+ * except when a page is being marked write back cacheable
+ */
+ return !((pgprot_val(attr) & L_PTE_MT_MASK) == L_PTE_MT_WRITEBACK);
+}
+
+static int change_page_attr_set_clr(unsigned long *addr, int numpages,
+ pgprot_t mask_set, pgprot_t mask_clr,
+ int force_split, int in_flag,
+ struct page **pages)
+{
+ struct cpa_data cpa;
+ int ret, cache, checkalias;
+ unsigned long baddr = 0;
+
+ if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
+ return 0;
+
+ /* Ensure we are PAGE_SIZE aligned */
+ if (in_flag & CPA_ARRAY) {
+ int i;
+ for (i = 0; i < numpages; i++) {
+ if (addr[i] & ~PAGE_MASK) {
+ addr[i] &= PAGE_MASK;
+ WARN_ON_ONCE(1);
+ }
+ }
+ } else if (!(in_flag & CPA_PAGES_ARRAY)) {
+ /*
+ * in_flag of CPA_PAGES_ARRAY implies it is aligned.
+ * No need to cehck in that case
+ */
+ if (*addr & ~PAGE_MASK) {
+ *addr &= PAGE_MASK;
+ /*
+ * People should not be passing in unaligned addresses:
+ */
+ WARN_ON_ONCE(1);
+ }
+ /*
+ * Save address for cache flush. *addr is modified in the call
+ * to __change_page_attr_set_clr() below.
+ */
+ baddr = *addr;
+ }
+
+ /* Must avoid aliasing mappings in the highmem code */
+ kmap_flush_unused();
+
+ vm_unmap_aliases();
+
+ cpa.vaddr = addr;
+ cpa.pages = pages;
+ cpa.numpages = numpages;
+ cpa.mask_set = mask_set;
+ cpa.mask_clr = mask_clr;
+ cpa.flags = 0;
+ cpa.curpage = 0;
+ cpa.force_split = force_split;
+
+ if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
+ cpa.flags |= in_flag;
+
+ /* No alias checking for XN bit modifications */
+ checkalias = (pgprot_val(mask_set) |
+ pgprot_val(mask_clr)) != L_PTE_EXEC;
+
+ ret = __change_page_attr_set_clr(&cpa, checkalias);
+
+ /*
+ * Check whether we really changed something:
+ */
+ if (!(cpa.flags & CPA_FLUSHTLB))
+ goto out;
+
+ cache = cache_attr(mask_set);
+
+ if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
+ cpa_flush_array(addr, numpages, cache,
+ cpa.flags, pages);
+ } else
+ cpa_flush_range(baddr, numpages, cache);
+
+out:
+ return ret;
+}
+
+static inline int change_page_attr_set(unsigned long *addr, int numpages,
+ pgprot_t mask, int array)
+{
+ return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
+ (array ? CPA_ARRAY : 0), NULL);
+}
+
+static inline int change_page_attr_clear(unsigned long *addr, int numpages,
+ pgprot_t mask, int array)
+{
+ return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
+ (array ? CPA_ARRAY : 0), NULL);
+}
+
+static inline int cpa_set_pages_array(struct page **pages, int numpages,
+ pgprot_t mask)
+{
+ return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0,
+ CPA_PAGES_ARRAY, pages);
+}
+
+static inline int cpa_clear_pages_array(struct page **pages, int numpages,
+ pgprot_t mask)
+{
+ return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0,
+ CPA_PAGES_ARRAY, pages);
+}
+
+int set_memory_uc(unsigned long addr, int numpages)
+{
+ return change_page_attr_set_clr(&addr, numpages,
+ __pgprot(L_PTE_MT_UNCACHED),
+ __pgprot(L_PTE_MT_MASK), 0, 0, NULL);
+}
+EXPORT_SYMBOL(set_memory_uc);
+
+int _set_memory_array(unsigned long *addr, int addrinarray,
+ unsigned long set, unsigned long clr)
+{
+ return change_page_attr_set_clr(addr, addrinarray, __pgprot(set),
+ __pgprot(clr), 0, CPA_ARRAY, NULL);
+}
+
+int set_memory_array_uc(unsigned long *addr, int addrinarray)
+{
+ return _set_memory_array(addr, addrinarray,
+ L_PTE_MT_UNCACHED, L_PTE_MT_MASK);
+}
+EXPORT_SYMBOL(set_memory_array_uc);
+
+int set_memory_array_wc(unsigned long *addr, int addrinarray)
+{
+ return _set_memory_array(addr, addrinarray,
+ L_PTE_MT_BUFFERABLE, L_PTE_MT_MASK);
+}
+EXPORT_SYMBOL(set_memory_array_wc);
+
+int set_memory_wc(unsigned long addr, int numpages)
+{
+ int ret;
+
+ ret = change_page_attr_set_clr(&addr, numpages,
+ __pgprot(L_PTE_MT_BUFFERABLE),
+ __pgprot(L_PTE_MT_MASK),
+ 0, 0, NULL);
+ return ret;
+}
+EXPORT_SYMBOL(set_memory_wc);
+
+int set_memory_wb(unsigned long addr, int numpages)
+{
+ return change_page_attr_set_clr(&addr, numpages,
+ __pgprot(L_PTE_MT_WRITEBACK),
+ __pgprot(L_PTE_MT_MASK),
+ 0, 0, NULL);
+}
+EXPORT_SYMBOL(set_memory_wb);
+
+int set_memory_iwb(unsigned long addr, int numpages)
+{
+ return change_page_attr_set_clr(&addr, numpages,
+ __pgprot(L_PTE_MT_INNER_WB),
+ __pgprot(L_PTE_MT_MASK),
+ 0, 0, NULL);
+}
+EXPORT_SYMBOL(set_memory_iwb);
+
+int set_memory_array_wb(unsigned long *addr, int addrinarray)
+{
+ return change_page_attr_set_clr(addr, addrinarray,
+ __pgprot(L_PTE_MT_WRITEBACK),
+ __pgprot(L_PTE_MT_MASK),
+ 0, CPA_ARRAY, NULL);
+
+}
+EXPORT_SYMBOL(set_memory_array_wb);
+
+int set_memory_array_iwb(unsigned long *addr, int addrinarray)
+{
+ return change_page_attr_set_clr(addr, addrinarray,
+ __pgprot(L_PTE_MT_INNER_WB),
+ __pgprot(L_PTE_MT_MASK),
+ 0, CPA_ARRAY, NULL);
+
+}
+EXPORT_SYMBOL(set_memory_array_iwb);
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+ return change_page_attr_set(&addr, numpages, __pgprot(L_PTE_EXEC), 0);
+}
+EXPORT_SYMBOL(set_memory_x);
+
+int set_memory_nx(unsigned long addr, int numpages)
+{
+ return change_page_attr_clear(&addr, numpages,
+ __pgprot(L_PTE_EXEC), 0);
+}
+EXPORT_SYMBOL(set_memory_nx);
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+ return change_page_attr_clear(&addr, numpages,
+ __pgprot(L_PTE_WRITE), 0);
+}
+EXPORT_SYMBOL_GPL(set_memory_ro);
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+ return change_page_attr_set(&addr, numpages,
+ __pgprot(L_PTE_WRITE), 0);
+}
+EXPORT_SYMBOL_GPL(set_memory_rw);
+
+int set_memory_np(unsigned long addr, int numpages)
+{
+ return change_page_attr_clear(&addr, numpages,
+ __pgprot(L_PTE_PRESENT), 0);
+}
+
+int set_memory_4k(unsigned long addr, int numpages)
+{
+ return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
+ __pgprot(0), 1, 0, NULL);
+}
+
+static int _set_pages_array(struct page **pages, int addrinarray,
+ unsigned long set, unsigned long clr)
+{
+ return change_page_attr_set_clr(NULL, addrinarray,
+ __pgprot(set),
+ __pgprot(clr),
+ 0, CPA_PAGES_ARRAY, pages);
+}
+
+int set_pages_array_uc(struct page **pages, int addrinarray)
+{
+ return _set_pages_array(pages, addrinarray,
+ L_PTE_MT_UNCACHED, L_PTE_MT_MASK);
+}
+EXPORT_SYMBOL(set_pages_array_uc);
+
+int set_pages_array_wc(struct page **pages, int addrinarray)
+{
+ return _set_pages_array(pages, addrinarray, L_PTE_MT_BUFFERABLE,
+ L_PTE_MT_MASK);
+}
+EXPORT_SYMBOL(set_pages_array_wc);
+
+int set_pages_array_wb(struct page **pages, int addrinarray)
+{
+ return _set_pages_array(pages, addrinarray,
+ L_PTE_MT_WRITEBACK, L_PTE_MT_MASK);
+}
+EXPORT_SYMBOL(set_pages_array_wb);
+
+int set_pages_array_iwb(struct page **pages, int addrinarray)
+{
+ return _set_pages_array(pages, addrinarray,
+ L_PTE_MT_INNER_WB, L_PTE_MT_MASK);
+}
+EXPORT_SYMBOL(set_pages_array_iwb);
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index ea92009b91df..536bcf393d15 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -19,6 +19,23 @@
#define FIRST_KERNEL_PGD_NR (FIRST_USER_PGD_NR + USER_PTRS_PER_PGD)
+DEFINE_SPINLOCK(pgd_lock);
+LIST_HEAD(pgd_list);
+
+static inline void pgd_list_add(pgd_t *pgd)
+{
+ struct page *page = virt_to_page(pgd);
+
+ list_add(&page->lru, &pgd_list);
+}
+
+static inline void pgd_list_del(pgd_t *pgd)
+{
+ struct page *page = virt_to_page(pgd);
+
+ list_del(&page->lru);
+}
+
/*
* need to get a 16k page for level 1
*/
@@ -27,6 +44,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
pgd_t *new_pgd, *init_pgd;
pmd_t *new_pmd, *init_pmd;
pte_t *new_pte, *init_pte;
+ unsigned long flags;
new_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, 2);
if (!new_pgd)
@@ -34,6 +52,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
memset(new_pgd, 0, FIRST_KERNEL_PGD_NR * sizeof(pgd_t));
+ spin_lock_irqsave(&pgd_lock, flags);
/*
* Copy over the kernel and IO PGD entries
*/
@@ -44,6 +63,10 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
#if !defined(CONFIG_CPU_CACHE_V7) || !defined(CONFIG_SMP)
clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));
#endif
+
+ pgd_list_add(new_pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
+
if (!vectors_high()) {
/*
* On ARM, first page must always be allocated since it
@@ -69,6 +92,9 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
no_pte:
pmd_free(mm, new_pmd);
no_pmd:
+ spin_lock_irqsave(&pgd_lock, flags);
+ pgd_list_del(new_pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
free_pages((unsigned long)new_pgd, 2);
no_pgd:
return NULL;
@@ -78,10 +104,15 @@ void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd)
{
pmd_t *pmd;
pgtable_t pte;
+ unsigned long flags;
if (!pgd)
return;
+ spin_lock_irqsave(&pgd_lock, flags);
+ pgd_list_del(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
+
/* pgd is always present and good */
pmd = pmd_off(pgd, 0);
if (pmd_none(*pmd))