[ARM] mm: add page allocator for modifying cache attributes

ARM CPUs with speculative prefetching have undefined behaviors when the same physical page is mapped to two different virtual addresses with conflicting cache attributes. since many recent systems include IOMMU functionality (i.e., remapping of discontiguous physical pages into a virtually-contiguous address range for I/O devices), it is desirable to support allocating any available OS memory for use by the I/O devices. however, since many systems do not support cache coherency between the CPU and DMA devices, these devices are left with using DMA-coherent allocations from the OS (which severely limits the benefit of an IOMMU) or performing cache maintenance (which can be a severe performance loss, particularly on systems with outer caches, compared to using DMA-coherent memory). this change adds an API for allocating pages from the OS with specific cache maintenance properties and ensures that the kernel's mapping of the page reflects the desired cache attributes, in line with the ARMv7 architectural requirements Change-Id: If0bd3cfe339b9a9b10fd6d45a748cd5e65931cf0 Signed-off-by: Gary King <gking@nvidia.com>
author: Gary King <gking@nvidia.com> 2010-08-02 15:55:16 -0700
committer: Dan Willemsen <dwillemsen@nvidia.com> 2011-11-30 21:36:01 -0800
commit: 87e7e9b170105c5d311f86a45223cc36bd24f3bb (patch)
tree: e57f62a1922b88c62828b5c9be5e59615a3f0630 /arch/arm/mm
parent: a03bc0f71c14b962b27ef64c3420e29ab42ac99d (diff)
3 files changed, 151 insertions, 0 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 88633fe01a5d..122d88e073d6 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -864,6 +864,29 @@ config ARM_L1_CACHE_SHIFT
 	default 6 if ARM_L1_CACHE_SHIFT_6
 	default 5
 
+config ARM_ATTRIB_ALLOCATOR
+	bool "Support custom cache attribute allocations in low memory"
+	select ARCH_LOWMEM_IN_PTES if (CPU_V7)
+	depends on MMU && !CPU_CACHE_VIVT
+	help
+	  Historically, the kernel has only reserved a small region
+	  of physical memory for uncached access, and relied on
+	  explicit cache maintenance for ensuring coherency between
+	  the CPU and DMA.
+
+	  However, many recent systems support mapping discontiguous
+	  physical pages into contiguous DMA addresses (so-called
+	  system MMUs). For some DMA clients (notably graphics and
+	  multimedia engines), performing explict cache maintenance
+	  between CPU and DMA mappings can be prohibitively expensive,
+	  and since ARMv7, mapping the same physical page with different
+	  cache attributes is disallowed and has unpredictable behavior.
+
+	  Say 'Y' here to include page allocation support with explicit
+	  cache attributes; on ARMv7 systems this will also force the
+	  kernel's page tables to be mapped using page tables rather
+	  than sections.
+
 config ARM_DMA_MEM_BUFFERABLE
 	bool "Use non-cacheable memory for DMA" if (CPU_V6 || CPU_V6K) && !CPU_V7
 	depends on !(MACH_REALVIEW_PB1176 || REALVIEW_EB_ARM11MP || \
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index bca7e61928c7..2766c8b1a30c 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -12,6 +12,8 @@ ifneq ($(CONFIG_MMU),y)
 obj-y				+= nommu.o
 endif
 
+obj-$(CONFIG_ARM_ATTRIB_ALLOCATOR) += attrib_alloc.o
+
 obj-$(CONFIG_MODULES)		+= proc-syms.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
diff --git a/arch/arm/mm/attrib_alloc.c b/arch/arm/mm/attrib_alloc.c
new file mode 100644
index 000000000000..0966a8b8bf41
--- /dev/null
+++ b/arch/arm/mm/attrib_alloc.c
@@ -0,0 +1,126 @@
+/*
+ * arch/arm/mm/attrib_alloc.c
+ *
+ * Page allocator with custom cache attributes
+ *
+ * Copyright (c) 2010, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/page-flags.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/fixmap.h>
+#include <asm/outercache.h>
+#include <asm/attrib_alloc.h>
+#include "mm.h"
+
+static void update_kmap_pte(struct page *page, pgprot_t prot)
+{
+#ifdef CONFIG_HIGHMEM
+	unsigned long addr;
+	pte_t *pte;
+
+	addr = (unsigned long)kmap_high_get(page);
+	BUG_ON(!PageHighMem(page) || addr >= FIXADDR_START);
+	if (!addr)
+		return;
+
+	pte = &pkmap_page_table[PKMAP_NR(addr)];
+	set_pte_at(&init_mm, addr, pte, mk_pte(page, __pgprot(prot)));
+	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+	kunmap_high(page);
+#endif
+}
+
+static void update_pte(struct page *page, pgprot_t prot)
+{
+#ifdef CONFIG_ARCH_LOWMEM_IN_PTES
+	unsigned long addr = (unsigned long)page_address(page);
+	pgd_t *pgd = pgd_offset_k(addr);
+	pmd_t *pmd = pmd_offset(pgd, addr);
+	pte_t *pte;
+
+	BUG_ON(pmd_none(*pmd));
+	pte = pte_offset_kernel(pmd, addr);
+	set_pte_at(&init_mm, addr, pte, mk_pte(page, __pgprot(prot)));
+	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+#endif
+}
+
+void arm_attrib_free_pages_exact(struct page *page, size_t size)
+{
+	struct page *p, *e;
+
+	size = PAGE_ALIGN(size);
+	e = page + (size >> PAGE_SHIFT);
+
+	/* reset the page's mappings back to the standard kernel mappings
+	 * before returning it to the page allocator */
+	for (p = page; p < e; p++) {
+		if (PageHighMem(p))
+			update_kmap_pte(p, pgprot_kernel);
+		else
+			update_pte(p, pgprot_kernel);
+
+		__free_page(p);
+	}
+}
+
+struct page *arm_attrib_alloc_pages_exact_node(int nid, gfp_t gfp,
+					       size_t size, pgprot_t prot)
+{
+	struct page *page, *p, *e;
+	unsigned int order;
+	unsigned long base;
+
+	size = PAGE_ALIGN(size);
+	order = get_order(size);
+	page = alloc_pages_node(nid, gfp, order);
+
+	if (!page)
+		return NULL;
+
+	split_page(page, order);
+
+	e = page + (1 << order);
+	for (p = page + (size >> PAGE_SHIFT); p < e; p++)
+		__free_page(p);
+
+	e = page + (size >> PAGE_SHIFT);
+
+	for (p = page; p < e; p++) {
+		__flush_dcache_page(page_mapping(p), p);
+
+		/* even though a freshly-allocated highmem page shouldn't
+		 * be mapped, because the kmaps are flushed lazily, it
+		 * is possible that a mapping from an old kmap_high call
+		 * is still present, and its cache attributes need to
+		 * be updated to match the new expectations */
+		if (PageHighMem(p))
+			update_kmap_pte(p, prot);
+		else
+			update_pte(p, prot);
+	}
+	base = page_to_phys(page);
+	outer_flush_range(base, base + (size >> PAGE_SHIFT));
+	return page;
+}
author	Gary King <gking@nvidia.com>	2010-08-02 15:55:16 -0700
committer	Dan Willemsen <dwillemsen@nvidia.com>	2011-11-30 21:36:01 -0800
commit	87e7e9b170105c5d311f86a45223cc36bd24f3bb (patch)
tree	e57f62a1922b88c62828b5c9be5e59615a3f0630 /arch/arm/mm
parent	a03bc0f71c14b962b27ef64c3420e29ab42ac99d (diff)