Avoid aliasing mappings in DMA coherent allocator

Avoid multiple mappings with DMA coherent/writecombine allocator by pre- allocating the mappings, and removing that memory from the system memory mapping. (See previous discussions on linux-arm-kernel as to why this is bad.) NB1: By default, we preallocate 2MB for DMA coherent, and 2MB for write combine memory, rather than 1MB for each in case 1MB is not sufficient for existing platform usage. Platforms have the option of shrinking this down to 1MB DMA / 1MB WC (or even 2MB DMA / 0MB WC) if they so wish. The DMA memory must be a multiple of 1MB, the write combine memory must also be a multiple of 1MB, and the two together must be a multiple of 2MB. NB2: On ARMv6/7 where we use 'normal uncacheable' memory for both DMA and WC, the two pools are combined into one, as was the case with the previous implementation. The down side to this change is that the memory is permanently set aside for DMA purposes, but I believe that to be unavoidable if we are to avoid the possibility of the cache getting in the way on VIPT CPUs. This removes the last known offender (at this time) from the kernel. Given that DMA memory is fully coherent by this patch, cache invalidation/clean is not required and so, we skip cache related activities for the memory managed by the DMA layer. The bus address -> virtual address conversion normally used in the calling path and the fact that we remove kernel static mapping corresponding to the DMA buffers leads to exceptions otherwise. bug 876019 bug 965047 bug 987589 Change-Id: I72beb386605aafe1a301494a95a67d094ea6b2e4 Signed-off-by: Russell King <rmk@arm.linux.org.uk> Signed-off-by: Manoj Chourasia <mchourasia@nvidia.com> Reviewed-on: http://git-master/r/106212 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Varun Wadekar <vwadekar@nvidia.com> Reviewed-by: Krishna Reddy <vdumpa@nvidia.com> Reviewed-by: Venkat Moganty <vmoganty@nvidia.com>
author: Manoj Chourasia <mchourasia@nvidia.com> 2012-06-04 17:25:43 +0530
committer: Rohan Somvanshi <rsomvanshi@nvidia.com> 2012-06-13 02:48:30 -0700
commit: 017d8bd2c4bd03afe04721476dd26388a4bfe7f6 (patch)
tree: 7b09ed282288a302c0a9f43cd43115bc03989ff6 /arch
parent: 6a42ac50db6ff2832ff616f586fe7217b885df14 (diff)
9 files changed, 855 insertions, 1 deletions
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 7a21d0bf7134..19e3c8c77f01 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -75,6 +75,27 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr)
  * Private support functions: these are not part of the API and are
  * liable to change.  Drivers must not use these.
  */
+#if defined(CONFIG_NON_ALIASED_COHERENT_MEM)
+static inline void __dma_single_cpu_to_dev(struct device *dev,
+	const void *kaddr, size_t size, enum dma_data_direction dir)
+{
+	extern void ___dma_single_cpu_to_dev(struct device *dev,
+		const void *, size_t, enum dma_data_direction);
+
+	if (!arch_is_coherent())
+		___dma_single_cpu_to_dev(dev, kaddr, size, dir);
+}
+
+static inline void __dma_single_dev_to_cpu(struct device *dev,
+	const void *kaddr, size_t size, enum dma_data_direction dir)
+{
+	extern void ___dma_single_dev_to_cpu(struct device *dev,
+		const void *, size_t, enum dma_data_direction);
+
+	if (!arch_is_coherent())
+		___dma_single_dev_to_cpu(dev, kaddr, size, dir);
+}
+#else
 static inline void __dma_single_cpu_to_dev(const void *kaddr, size_t size,
 	enum dma_data_direction dir)
 {
@@ -94,6 +115,7 @@ static inline void __dma_single_dev_to_cpu(const void *kaddr, size_t size,
 	if (!arch_is_coherent())
 		___dma_single_dev_to_cpu(kaddr, size, dir);
 }
+#endif
 
 static inline void __dma_page_cpu_to_dev(struct page *page, unsigned long off,
 	size_t size, enum dma_data_direction dir)
@@ -199,8 +221,12 @@ int dma_mmap_coherent(struct device *, struct vm_area_struct *,
 extern void *dma_alloc_writecombine(struct device *, size_t, dma_addr_t *,
 		gfp_t);
 
+#if defined(CONFIG_NON_ALIASED_COHERENT_MEM)
+extern void dma_free_writecombine(struct device *, size_t, void *, dma_addr_t);
+#else
 #define dma_free_writecombine(dev,size,cpu_addr,handle) \
 	dma_free_coherent(dev,size,cpu_addr,handle)
+#endif
 
 int dma_mmap_writecombine(struct device *, struct vm_area_struct *,
 		void *, dma_addr_t, size_t);
@@ -421,7 +447,12 @@ static inline void dma_sync_single_range_for_cpu(struct device *dev,
 	if (!dmabounce_sync_for_cpu(dev, handle, offset, size, dir))
 		return;
 
+#if defined(CONFIG_NON_ALIASED_COHERENT_MEM)
+	__dma_single_dev_to_cpu(dev, dma_to_virt(dev, handle) + offset,
+		size, dir);
+#else
 	__dma_single_dev_to_cpu(dma_to_virt(dev, handle) + offset, size, dir);
+#endif
 }
 
 static inline void dma_sync_single_range_for_device(struct device *dev,
@@ -435,7 +466,12 @@ static inline void dma_sync_single_range_for_device(struct device *dev,
 	if (!dmabounce_sync_for_device(dev, handle, offset, size, dir))
 		return;
 
+#if defined(CONFIG_NON_ALIASED_COHERENT_MEM)
+	__dma_single_cpu_to_dev(dev, dma_to_virt(dev, handle) + offset,
+		size, dir);
+#else
 	__dma_single_cpu_to_dev(dma_to_virt(dev, handle) + offset, size, dir);
+#endif
 }
 
 static inline void dma_sync_single_for_cpu(struct device *dev,
diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h
index d2fedb5aeb1f..3845215404dd 100644
--- a/arch/arm/include/asm/mach/map.h
+++ b/arch/arm/include/asm/mach/map.h
@@ -29,6 +29,8 @@ struct map_desc {
 #define MT_MEMORY_NONCACHED	11
 #define MT_MEMORY_DTCM		12
 #define MT_MEMORY_ITCM		13
+#define MT_DMA_COHERENT		14
+#define MT_WC_COHERENT		15
 
 #ifdef CONFIG_MMU
 extern void iotable_init(struct map_desc *, int);
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 25669795d80d..cf7c02042f60 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -88,6 +88,13 @@
 #define CONSISTENT_END		(0xffe00000UL)
 #define CONSISTENT_BASE		(CONSISTENT_END - CONSISTENT_DMA_SIZE)
 
+#ifndef CONSISTENT_WC_SIZE
+#define CONSISTENT_WC_SIZE	SZ_2M
+#endif
+
+#define CONSISTENT_WC_END	CONSISTENT_BASE
+#define CONSISTENT_WC_BASE	(CONSISTENT_WC_END - CONSISTENT_WC_SIZE)
+
 #else /* CONFIG_MMU */
 
 /*
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index aaea6d487bae..e23ad5ebc330 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -885,6 +885,17 @@ config ARM_DMA_MEM_BUFFERABLE
 
 	  You are recommended say 'Y' here and debug any affected drivers.
 
+config NON_ALIASED_COHERENT_MEM
+	bool "Avoid aliasing mappings in DMA coherent allocator" if CPU_V7
+	help
+	  Avoid multiple mappings with DMA coherent/writecombine allocator by
+	  pre-allocating the mappings, and removing that memory from the
+	  system memory mapping.
+
+	  This will make sure DMA memory is fully coherent, so cache
+	  dma_sync APIs will not be necessary to maintain cache cohereny
+	  for DMA coherent region.
+
 config ARCH_HAS_BARRIERS
 	bool
 	help
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 47e2e3ba1902..c3ef2961ddff 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -2,9 +2,15 @@
 # Makefile for the linux arm-specific parts of the memory manager.
 #
 
-obj-y				:= dma-mapping.o extable.o fault.o init.o \
+obj-y				:= extable.o fault.o init.o \
 				   iomap.o
 
+ifeq ($(CONFIG_NON_ALIASED_COHERENT_MEM),y)
+obj-y				+= dma-na-mapping.o
+else
+obj-y				+= dma-mapping.o
+endif
+
 obj-$(CONFIG_MMU)		+= fault-armv.o flush.o idmap.o ioremap.o \
 				   mmap.o pgd.o mmu.o vmregion.o pageattr.o
 
diff --git a/arch/arm/mm/dma-na-mapping.c b/arch/arm/mm/dma-na-mapping.c
new file mode 100644
index 000000000000..4e5d52dda285
--- /dev/null
+++ b/arch/arm/mm/dma-na-mapping.c
@@ -0,0 +1,748 @@
+/*
+ *  linux/arch/arm/mm/dma-mapping.c
+ *
+ *  Copyright (C) 2000-2004 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  DMA uncached mapping support.
+ */
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/memblock.h>
+
+#include <asm/memory.h>
+#include <asm/highmem.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/sizes.h>
+#include <asm/mach/map.h>
+
+#include "mm.h"
+
+static u64 get_coherent_dma_mask(struct device *dev)
+{
+	u64 mask = (u64)arm_dma_limit;
+
+	if (dev) {
+		mask = dev->coherent_dma_mask;
+
+		/*
+		 * Sanity check the DMA mask - it must be non-zero, and
+		 * must be able to be satisfied by a DMA allocation.
+		 */
+		if (mask == 0) {
+			dev_warn(dev, "coherent DMA mask is unset\n");
+			return 0;
+		}
+
+		if ((~mask) & (u64)arm_dma_limit) {
+			dev_warn(dev, "coherent DMA mask %#llx is smaller "
+				 "than system GFP_DMA mask %#llx\n",
+				 mask, (u64)arm_dma_limit);
+			return 0;
+		}
+	}
+
+	return mask;
+}
+
+/*
+ * Allocate a DMA buffer for 'dev' of size 'size' using the
+ * specified gfp mask.  Note that 'size' must be page aligned.
+ */
+static struct page *__dma_alloc_buffer(struct device *dev,
+	size_t size, gfp_t gfp)
+{
+	unsigned long order = get_order(size);
+	struct page *page, *p, *e;
+	void *ptr;
+	u64 mask = get_coherent_dma_mask(dev);
+
+#ifdef CONFIG_DMA_API_DEBUG
+	u64 limit = (mask + 1) & ~mask;
+	if (limit && size >= limit) {
+		dev_warn(dev, "coherent allocation too big"
+			"(requested %#x mask %#llx)\n",
+			size, mask);
+		return NULL;
+	}
+#endif
+
+	if (!mask)
+		return NULL;
+
+	if (mask < 0xffffffffULL)
+		gfp |= GFP_DMA;
+
+	page = alloc_pages(gfp, order);
+	if (!page)
+		return NULL;
+
+	/*
+	 * Now split the huge page and free the excess pages
+	 */
+	split_page(page, order);
+	for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order);
+		p < e; p++)
+		__free_page(p);
+
+	/*
+	 * Ensure that the allocated pages are zeroed, and that any data
+	 * lurking in the kernel direct-mapped region is invalidated.
+	 */
+	ptr = page_address(page);
+	memset(ptr, 0, size);
+	dmac_flush_range(ptr, ptr + size);
+	outer_flush_range(__pa(ptr), __pa(ptr) + size);
+
+	return page;
+}
+
+/*
+ * Free a DMA buffer.  'size' must be page aligned.
+ */
+static void __dma_free_buffer(struct page *page, size_t size)
+{
+	struct page *e = page + (size >> PAGE_SHIFT);
+
+	while (page < e) {
+		__free_page(page);
+		page++;
+	}
+}
+
+#ifdef CONFIG_MMU
+/* Sanity check sizes */
+#if CONSISTENT_DMA_SIZE % SECTION_SIZE
+#error "CONSISTENT_DMA_SIZE must be a multiple of the section size"
+#endif
+#if CONSISTENT_WC_SIZE % SECTION_SIZE
+#error "CONSISTENT_WC_SIZE must be a multiple of the section size"
+#endif
+#if ((CONSISTENT_DMA_SIZE + CONSISTENT_WC_SIZE) % SZ_2M)
+#error "Sum of CONSISTENT_DMA_SIZE and CONSISTENT_WC_SIZE must " \
+	"be multiple of 2MiB"
+#endif
+
+#include "vmregion.h"
+
+struct dma_coherent_area {
+	struct arm_vmregion_head vm;
+	unsigned long pfn;
+	unsigned long pfn_end;
+	unsigned int type;
+	const char *name;
+};
+
+static struct dma_coherent_area coherent_wc_head = {
+	.vm = {
+		.vm_start	= CONSISTENT_WC_BASE,
+		.vm_end		= CONSISTENT_WC_END,
+	},
+	.pfn = 0,
+	.pfn_end = 0,
+	.type = MT_WC_COHERENT,
+	.name = "WC ",
+};
+
+static struct dma_coherent_area coherent_dma_head = {
+	.vm = {
+		.vm_start	= CONSISTENT_BASE,
+		.vm_end		= CONSISTENT_END,
+	},
+	.pfn = 0,
+	.pfn_end = 0,
+	.type = MT_DMA_COHERENT,
+	.name = "DMA coherent ",
+};
+
+static struct dma_coherent_area *coherent_areas[2] __initdata =	{
+	&coherent_wc_head,
+	&coherent_dma_head
+};
+
+static struct dma_coherent_area *coherent_map[2];
+#define coherent_wc_area coherent_map[0]
+#define coherent_dma_area coherent_map[1]
+
+void dma_coherent_reserve(void)
+{
+	phys_addr_t base;
+	unsigned long size;
+	int can_share, i;
+
+	if (arch_is_coherent())
+		return;
+
+#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
+	/* ARMv6: only when DMA_MEM_BUFFERABLE is enabled */
+	can_share = cpu_architecture() >= CPU_ARCH_ARMv6;
+#else
+	/* ARMv7+: WC and DMA areas have the same properties, so can share */
+	can_share = cpu_architecture() >= CPU_ARCH_ARMv7;
+#endif
+	if (can_share) {
+		coherent_wc_head.name = "DMA coherent/WC ";
+		coherent_wc_head.vm.vm_end = coherent_dma_head.vm.vm_end;
+		coherent_dma_head.vm.vm_start = coherent_dma_head.vm.vm_end;
+		coherent_dma_area = coherent_wc_area = &coherent_wc_head;
+	} else {
+		memcpy(coherent_map, coherent_areas, sizeof(coherent_map));
+	}
+
+	for (i = 0; i < ARRAY_SIZE(coherent_areas); i++) {
+		struct dma_coherent_area *area = coherent_areas[i];
+
+		size = area->vm.vm_end - area->vm.vm_start;
+		if (!size)
+			continue;
+
+		spin_lock_init(&area->vm.vm_lock);
+		INIT_LIST_HEAD(&area->vm.vm_list);
+
+		base = memblock_end_of_DRAM() - size;
+		memblock_free(base, size);
+		memblock_remove(base, size);
+
+		area->pfn = __phys_to_pfn(base);
+		area->pfn_end = __phys_to_pfn(base+size);
+
+		pr_info("DMA: %luMiB %smemory allocated at 0x%08llx phys\n",
+			size / 1048576, area->name, (unsigned long long)base);
+	}
+}
+
+void __init dma_coherent_mapping(void)
+{
+	struct map_desc map[ARRAY_SIZE(coherent_areas)];
+	int nr;
+
+	for (nr = 0; nr < ARRAY_SIZE(map); nr++) {
+		struct dma_coherent_area *area = coherent_areas[nr];
+
+		map[nr].pfn = area->pfn;
+		map[nr].virtual = area->vm.vm_start;
+		map[nr].length = area->vm.vm_end - area->vm.vm_start;
+		map[nr].type = area->type;
+		if (map[nr].length == 0)
+			break;
+	}
+
+	iotable_init(map, nr);
+}
+
+static void *dma_alloc_area(size_t size, unsigned long *pfn, gfp_t gfp,
+	struct dma_coherent_area *area)
+{
+	struct arm_vmregion *c;
+	size_t align;
+	int bit;
+
+	/*
+	 * Align the virtual region allocation - maximum alignment is
+	 * a section size, minimum is a page size.  This helps reduce
+	 * fragmentation of the DMA space, and also prevents allocations
+	 * smaller than a section from crossing a section boundary.
+	 */
+	bit = fls(size - 1);
+	if (bit > SECTION_SHIFT)
+		bit = SECTION_SHIFT;
+	align = 1 << bit;
+
+	/*
+	 * Allocate a virtual address in the consistent mapping region.
+	 */
+	c = arm_vmregion_alloc(&area->vm, align, size,
+			    gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
+	if (!c)
+		return NULL;
+
+	memset((void *)c->vm_start, 0, size);
+	*pfn = area->pfn + ((c->vm_start - area->vm.vm_start) >> PAGE_SHIFT);
+	c->vm_pages = pfn_to_page(*pfn);
+
+	return (void *)c->vm_start;
+}
+
+static void dma_free_area(void *cpu_addr, size_t size,
+	struct dma_coherent_area *area)
+{
+	struct arm_vmregion *c;
+
+	c = arm_vmregion_find_remove(&area->vm, (unsigned long)cpu_addr);
+	if (!c) {
+		printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
+		       __func__, cpu_addr);
+		dump_stack();
+		return;
+	}
+
+	if ((c->vm_end - c->vm_start) != size) {
+		printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
+		       __func__, c->vm_end - c->vm_start, size);
+		dump_stack();
+		size = c->vm_end - c->vm_start;
+	}
+
+	arm_vmregion_free(&area->vm, c);
+}
+
+#define nommu() (0)
+
+#else	/* !CONFIG_MMU */
+
+#define dma_alloc_area(size, pfn, gfp, area)	({ *(pfn) = 0; NULL })
+#define dma_free_area(addr, size, area)		do { } while (0)
+
+#define nommu()	(1)
+#define coherent_wc_area NULL
+#define coherent_dma_area NULL
+
+void dma_coherent_reserve(void)
+{
+}
+
+#endif	/* CONFIG_MMU */
+
+static void *
+__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
+	    struct dma_coherent_area *area)
+{
+
+	unsigned long pfn;
+	void *ret;
+	/* Following is a work-around (a.k.a. hack) to prevent pages
+	 * with __GFP_COMP being passed to split_page() which cannot
+	 * handle them.  The real problem is that this flag probably
+	 * should be 0 on ARM as it is not supported on this
+	 * platform--see CONFIG_HUGETLB_PAGE. */
+	gfp &= ~(__GFP_COMP);
+
+	*handle = ~0;
+	size = PAGE_ALIGN(size);
+
+	if (arch_is_coherent() || nommu()) {
+		struct page *page = __dma_alloc_buffer(dev, size, gfp);
+		if (!page)
+			return NULL;
+		pfn = page_to_pfn(page);
+		ret = page_address(page);
+	} else {
+		ret = dma_alloc_area(size, &pfn, gfp, area);
+	}
+
+	if (ret)
+		*handle = pfn_to_dma(dev, pfn);
+
+	return ret;
+}
+
+static void __dma_free(struct device *dev, size_t size, void *cpu_addr,
+	dma_addr_t handle, struct dma_coherent_area *area)
+{
+	size = PAGE_ALIGN(size);
+
+	if (arch_is_coherent() || nommu())
+		__dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size);
+	else
+		dma_free_area(cpu_addr, size, area);
+}
+
+/*
+ * Allocate DMA-coherent memory space and return both the kernel remapped
+ * virtual and bus address for that space.
+ */
+void *
+dma_alloc_coherent(struct device *dev, size_t size,
+	dma_addr_t *handle, gfp_t gfp)
+{
+	void *memory;
+
+	if (dma_alloc_from_coherent(dev, size, handle, &memory))
+		return memory;
+
+	return __dma_alloc(dev, size, handle, gfp, coherent_dma_area);
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+/*
+ * Allocate a writecombining region, in much the same way as
+ * dma_alloc_coherent above.
+ */
+void *
+dma_alloc_writecombine(struct device *dev, size_t size,
+	dma_addr_t *handle, gfp_t gfp)
+{
+	return __dma_alloc(dev, size, handle, gfp, coherent_wc_area);
+}
+EXPORT_SYMBOL(dma_alloc_writecombine);
+
+static int dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		    void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		    struct dma_coherent_area *area)
+{
+	int ret = -ENXIO;
+#ifdef CONFIG_MMU
+	unsigned long user_size, kern_size;
+	struct arm_vmregion *c;
+
+	user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+	c = arm_vmregion_find(&area->vm, (unsigned long)cpu_addr);
+	if (c) {
+		unsigned long off = vma->vm_pgoff;
+
+		kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
+
+		if (off < kern_size &&
+		    user_size <= (kern_size - off)) {
+			ret = remap_pfn_range(vma, vma->vm_start,
+					      page_to_pfn(c->vm_pages) + off,
+					      user_size << PAGE_SHIFT,
+					      vma->vm_page_prot);
+		}
+	}
+#endif	/* CONFIG_MMU */
+
+	return ret;
+}
+
+int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+		      void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+	vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot);
+	return dma_mmap(dev, vma, cpu_addr, dma_addr, size, coherent_dma_area);
+}
+EXPORT_SYMBOL(dma_mmap_coherent);
+
+int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
+			  void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	return dma_mmap(dev, vma, cpu_addr, dma_addr, size, coherent_wc_area);
+}
+EXPORT_SYMBOL(dma_mmap_writecombine);
+
+/*
+ * free a page as defined by the above mapping.
+ * Must not be called with IRQs disabled.
+ */
+void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
+	dma_addr_t handle)
+{
+	WARN_ON(irqs_disabled());
+
+	if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
+		return;
+
+	__dma_free(dev, size, cpu_addr, handle, coherent_dma_area);
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+void dma_free_writecombine(struct device *dev, size_t size, void *cpu_addr,
+	dma_addr_t handle)
+{
+	WARN_ON(irqs_disabled());
+
+	__dma_free(dev, size, cpu_addr, handle, coherent_wc_area);
+}
+EXPORT_SYMBOL(dma_free_writecombine);
+
+#define is_coherent_pfn(x) ((((x) >= coherent_wc_head.pfn) &&	\
+			((x) < coherent_wc_head.pfn_end)) ||	\
+			(((x) >= coherent_dma_head.pfn) &&	\
+			((x) < coherent_dma_head.pfn_end)))
+/*
+ * Make an area consistent for devices.
+ * Note: Drivers should NOT use this function directly, as it will break
+ * platforms with CONFIG_DMABOUNCE.
+ * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
+ */
+void ___dma_single_cpu_to_dev(struct device *dev, const void *kaddr,
+	size_t size, enum dma_data_direction dir)
+{
+	unsigned long paddr;
+	unsigned long kaddr_pfn;
+
+	kaddr_pfn = dma_to_pfn(dev, virt_to_dma(dev, (void *)kaddr));
+
+	if (is_coherent_pfn(kaddr_pfn))
+		return;
+
+	BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1));
+
+	dmac_map_area(kaddr, size, dir);
+
+	paddr = __pa(kaddr);
+	if (dir == DMA_FROM_DEVICE)
+		outer_inv_range(paddr, paddr + size);
+	else
+		outer_clean_range(paddr, paddr + size);
+
+	/* FIXME: non-speculating: flush on bidirectional mappings? */
+}
+EXPORT_SYMBOL(___dma_single_cpu_to_dev);
+
+void ___dma_single_dev_to_cpu(struct device *dev, const void *kaddr,
+	size_t size, enum dma_data_direction dir)
+{
+	unsigned long kaddr_pfn;
+
+	kaddr_pfn = dma_to_pfn(dev, virt_to_dma(dev, (void *)kaddr));
+
+	if (is_coherent_pfn(kaddr_pfn))
+		return;
+
+	BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1));
+
+	/* FIXME: non-speculating: not required */
+	/* don't bother invalidating if DMA to device */
+	if (dir != DMA_TO_DEVICE) {
+		unsigned long paddr = __pa(kaddr);
+		outer_inv_range(paddr, paddr + size);
+	}
+
+	dmac_unmap_area(kaddr, size, dir);
+}
+EXPORT_SYMBOL(___dma_single_dev_to_cpu);
+
+static void dma_cache_maint_page(struct page *page, unsigned long offset,
+	size_t size, enum dma_data_direction dir,
+	void (*op)(const void *, size_t, int))
+{
+	/*
+	 * A single sg entry may refer to multiple physically contiguous
+	 * pages.  But we still need to process highmem pages individually.
+	 * If highmem is not configured then the bulk of this loop gets
+	 * optimized out.
+	 */
+	size_t left = size;
+	do {
+		size_t len = left;
+		void *vaddr;
+
+		if (PageHighMem(page)) {
+			if (len + offset > PAGE_SIZE) {
+				if (offset >= PAGE_SIZE) {
+					page += offset / PAGE_SIZE;
+					offset %= PAGE_SIZE;
+				}
+				len = PAGE_SIZE - offset;
+			}
+			vaddr = kmap_high_get(page);
+			if (vaddr) {
+				vaddr += offset;
+				op(vaddr, len, dir);
+				kunmap_high(page);
+			} else if (cache_is_vipt()) {
+				/* unmapped pages might still be cached */
+				vaddr = kmap_atomic(page);
+				op(vaddr + offset, len, dir);
+				kunmap_atomic(vaddr);
+			}
+		} else {
+			vaddr = page_address(page) + offset;
+			op(vaddr, len, dir);
+		}
+		offset = 0;
+		page++;
+		left -= len;
+	} while (left);
+}
+
+void ___dma_page_cpu_to_dev(struct page *page, unsigned long off,
+	size_t size, enum dma_data_direction dir)
+{
+	unsigned long paddr;
+
+	dma_cache_maint_page(page, off, size, dir, dmac_map_area);
+
+	paddr = page_to_phys(page) + off;
+	if (dir == DMA_FROM_DEVICE)
+		outer_inv_range(paddr, paddr + size);
+	else
+		outer_clean_range(paddr, paddr + size);
+	/* FIXME: non-speculating: flush on bidirectional mappings? */
+}
+EXPORT_SYMBOL(___dma_page_cpu_to_dev);
+
+void ___dma_page_dev_to_cpu(struct page *page, unsigned long off,
+	size_t size, enum dma_data_direction dir)
+{
+	unsigned long paddr = page_to_phys(page) + off;
+
+	/* FIXME: non-speculating: not required */
+	/* don't bother invalidating if DMA to device */
+	if (dir != DMA_TO_DEVICE)
+		outer_inv_range(paddr, paddr + size);
+
+	dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
+
+	/*
+	 * Mark the D-cache clean for this page to avoid extra flushing.
+	 */
+	if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE)
+		set_bit(PG_dcache_clean, &page->flags);
+}
+EXPORT_SYMBOL(___dma_page_dev_to_cpu);
+
+/**
+ * dma_map_sg - map a set of SG buffers for streaming mode DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Map a set of buffers described by scatterlist in streaming mode for DMA.
+ * This is the scatter-gather version of the dma_map_single interface.
+ * Here the scatter gather list elements are each tagged with the
+ * appropriate dma address and length.  They are obtained via
+ * sg_dma_{address,length}.
+ *
+ * Device ownership issues as mentioned for dma_map_single are the same
+ * here.
+ */
+int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+		enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i, j;
+
+	BUG_ON(!valid_dma_direction(dir));
+
+	for_each_sg(sg, s, nents, i) {
+		s->dma_address = __dma_map_page(dev, sg_page(s), s->offset,
+						s->length, dir);
+		if (dma_mapping_error(dev, s->dma_address))
+			goto bad_mapping;
+	}
+	debug_dma_map_sg(dev, sg, nents, nents, dir);
+	return nents;
+
+ bad_mapping:
+	for_each_sg(sg, s, i, j)
+		__dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir);
+	return 0;
+}
+EXPORT_SYMBOL(dma_map_sg);
+
+/**
+ * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ *
+ * Unmap a set of streaming mode DMA translations.  Again, CPU access
+ * rules concerning calls here are the same as for dma_unmap_single().
+ */
+void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+		enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	debug_dma_unmap_sg(dev, sg, nents, dir);
+
+	for_each_sg(sg, s, nents, i)
+		__dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir);
+}
+EXPORT_SYMBOL(dma_unmap_sg);
+
+/**
+ * dma_sync_sg_for_cpu
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map (returned from dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ */
+void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+			int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		if (!dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0,
+					    sg_dma_len(s), dir))
+			continue;
+
+		__dma_page_dev_to_cpu(sg_page(s), s->offset,
+				      s->length, dir);
+	}
+
+	debug_dma_sync_sg_for_cpu(dev, sg, nents, dir);
+}
+EXPORT_SYMBOL(dma_sync_sg_for_cpu);
+
+/**
+ * dma_sync_sg_for_device
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map (returned from dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ */
+void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+			int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0,
+					sg_dma_len(s), dir))
+			continue;
+
+		__dma_page_cpu_to_dev(sg_page(s), s->offset,
+				      s->length, dir);
+	}
+
+	debug_dma_sync_sg_for_device(dev, sg, nents, dir);
+}
+EXPORT_SYMBOL(dma_sync_sg_for_device);
+
+/*
+ * Return whether the given device DMA address mask can be supported
+ * properly.  For example, if your device can only drive the low 24-bits
+ * during bus mastering, then you would pass 0x00ffffff as the mask
+ * to this function.
+ */
+int dma_supported(struct device *dev, u64 mask)
+{
+	if (mask < (u64)arm_dma_limit)
+		return 0;
+	return 1;
+}
+EXPORT_SYMBOL(dma_supported);
+
+int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+		return -EIO;
+
+#ifndef CONFIG_DMABOUNCE
+	*dev->dma_mask = dma_mask;
+#endif
+
+	return 0;
+}
+EXPORT_SYMBOL(dma_set_mask);
+
+#define PREALLOC_DMA_DEBUG_ENTRIES	4096
+
+static int __init dma_debug_do_init(void)
+{
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+	return 0;
+}
+fs_initcall(dma_debug_do_init);
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index f8037ba338ac..6f81c8e05c3a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -364,6 +364,9 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
 #endif
 
 	arm_mm_memblock_reserve();
+#if defined(CONFIG_NON_ALIASED_COHERENT_MEM)
+	dma_coherent_reserve();
+#endif
 	arm_dt_memblock_reserve();
 
 	/* reserve any platform specific memblock areas */
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 010566799c80..967a8048f321 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -31,3 +31,8 @@ extern u32 arm_dma_limit;
 
 void __init bootmem_init(void);
 void arm_mm_memblock_reserve(void);
+
+#ifdef CONFIG_NON_ALIASED_COHERENT_MEM
+void dma_coherent_reserve(void);
+void dma_coherent_mapping(void);
+#endif
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4fa9c246ae93..bb80555edac9 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -273,6 +273,18 @@ static struct mem_type mem_types[] = {
 		.prot_l1   = PMD_TYPE_TABLE,
 		.domain    = DOMAIN_KERNEL,
 	},
+#ifdef CONFIG_NON_ALIASED_COHERENT_MEM
+	[MT_DMA_COHERENT] = {
+		.prot_sect	= PMD_TYPE_SECT | PMD_SECT_AP_WRITE |
+				  PMD_SECT_S,
+		.domain		= DOMAIN_IO,
+	},
+	[MT_WC_COHERENT] = {
+		.prot_sect	= PMD_TYPE_SECT | PMD_SECT_AP_WRITE |
+				  PMD_SECT_S,
+		.domain		= DOMAIN_IO,
+	},
+#endif
 };
 
 const struct mem_type *get_mem_type(unsigned int type)
@@ -353,6 +365,9 @@ static void __init build_mem_type_table(void)
 			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;
 			mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;
 			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN;
+#ifdef CONFIG_NON_ALIASED_COHERENT_MEM
+			mem_types[MT_DMA_COHERENT].prot_sect |= PMD_SECT_XN;
+#endif
 		}
 		if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
 			/*
@@ -457,13 +472,30 @@ static void __init build_mem_type_table(void)
 			/* Non-cacheable Normal is XCB = 001 */
 			mem_types[MT_MEMORY_NONCACHED].prot_sect |=
 				PMD_SECT_BUFFERED;
+#ifdef CONFIG_NON_ALIASED_COHERENT_MEM
+			mem_types[MT_WC_COHERENT].prot_sect |=
+				PMD_SECT_BUFFERED;
+			mem_types[MT_DMA_COHERENT].prot_sect |=
+				PMD_SECT_BUFFERED;
+#endif
 		} else {
 			/* For both ARMv6 and non-TEX-remapping ARMv7 */
 			mem_types[MT_MEMORY_NONCACHED].prot_sect |=
 				PMD_SECT_TEX(1);
+#ifdef CONFIG_NON_ALIASED_COHERENT_MEM
+			mem_types[MT_WC_COHERENT].prot_sect |=
+				PMD_SECT_TEX(1);
+#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
+			mem_types[MT_DMA_COHERENT].prot_sect |=
+				PMD_SECT_TEX(1);
+#endif
+#endif
 		}
 	} else {
 		mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
+#ifdef CONFIG_NON_ALIASED_COHERENT_MEM
+		mem_types[MT_WC_COHERENT].prot_sect |= PMD_SECT_BUFFERED;
+#endif
 	}
 
 	for (i = 0; i < 16; i++) {
@@ -986,6 +1018,10 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 		create_mapping(&map);
 	}
 
+#ifdef CONFIG_NON_ALIASED_COHERENT_MEM
+	dma_coherent_mapping();
+#endif
+
 	/*
 	 * Ask the machine support to map in the statically mapped devices.
 	 */
author	Manoj Chourasia <mchourasia@nvidia.com>	2012-06-04 17:25:43 +0530
committer	Rohan Somvanshi <rsomvanshi@nvidia.com>	2012-06-13 02:48:30 -0700
commit	017d8bd2c4bd03afe04721476dd26388a4bfe7f6 (patch)
tree	7b09ed282288a302c0a9f43cd43115bc03989ff6 /arch
parent	6a42ac50db6ff2832ff616f586fe7217b885df14 (diff)