From 94581094e774402a11887719bac10505236c2d51 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:39 +0200 Subject: x86: add alloc_coherent dma_ops callback to GART driver [ v2 - x86: make gart_alloc_coherent return zeroed memory FUJITA Tomonori pointed it out that the dma_alloc_coherent function should return memory set to zero. This patch adds this to the GART implementation too. ] Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 49285f8fd4d5..076e64b2d4f3 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -499,6 +499,26 @@ error: return 0; } +/* allocate and map a coherent mapping */ +static void * +gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, + gfp_t flag) +{ + void *vaddr; + + vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); + if (!vaddr) + return NULL; + + *dma_addr = gart_map_single(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL); + if (*dma_addr != bad_dma_address) + return vaddr; + + free_pages((unsigned long)vaddr, get_order(size)); + + return NULL; +} + static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) @@ -701,6 +721,7 @@ static struct dma_mapping_ops gart_dma_ops = { .sync_sg_for_device = NULL, .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, + .alloc_coherent = gart_alloc_coherent, }; void gart_iommu_shutdown(void) -- cgit v1.2.3 From 43a5a5a09b8cfd56047706cf904718d073ccfd33 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:40 +0200 Subject: x86: add free_coherent dma_ops callback to GART driver Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 076e64b2d4f3..ef753e233580 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -519,6 +519,15 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, return NULL; } +/* free a coherent mapping */ +static void +gart_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr) +{ + gart_unmap_single(dev, dma_addr, size, DMA_BIDIRECTIONAL); + free_pages((unsigned long)vaddr, get_order(size)); +} + static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) @@ -722,6 +731,7 @@ static struct dma_mapping_ops gart_dma_ops = { .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, .alloc_coherent = gart_alloc_coherent, + .free_coherent = gart_free_coherent, }; void gart_iommu_shutdown(void) -- cgit v1.2.3 From 6c505ce3930c6a6b455cda53fab3e88ae44f8221 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:45 +0200 Subject: x86: move dma_*_coherent functions to include file All the x86 DMA-API functions are defined in asm/dma-mapping.h. This patch moves the dma_*_coherent functions also to this header file because they are now small enough to do so. This is done as a separate patch because it also includes some renaming and restructuring of the dma-mapping.h file. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index ef753e233580..e81df25dc067 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -276,7 +276,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) unsigned long bus; if (!dev) - dev = &fallback_dev; + dev = &x86_dma_fallback_dev; if (!need_iommu(dev, paddr, size)) return paddr; @@ -427,7 +427,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) return 0; if (!dev) - dev = &fallback_dev; + dev = &x86_dma_fallback_dev; out = 0; start = 0; -- cgit v1.2.3 From 766af9fa812f49feb4a3e62cf92f3d37f33c7fb6 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 22 Aug 2008 00:12:09 +0900 Subject: dma-mapping.h, x86: remove last user of dma_mapping_ops->map_simple pci-dma.c doesn't use map_simple hook any more so we can remove it from struct dma_mapping_ops now. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index e81df25dc067..cfd7ec25e37e 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -720,7 +720,6 @@ extern int agp_amd64_init(void); static struct dma_mapping_ops gart_dma_ops = { .map_single = gart_map_single, - .map_simple = gart_map_simple, .unmap_single = gart_unmap_single, .sync_single_for_cpu = NULL, .sync_single_for_device = NULL, -- cgit v1.2.3 From 421076e2bec1b917bdcf83da35b24f6349bf35db Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 22 Aug 2008 16:29:10 +0900 Subject: x86: dma_*_coherent rework patchset v2, fix alloc_coherent dma_ops callback was added to GART, however, it doesn't return a size aligned address wrt dma_alloc_coherent, as DMA-mapping.txt defines. This patch fixes it. This patch also removes unused gart_map_simple (dma_mapping_ops->map_simple has gone). Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 338c4f241559..4d0864900b8c 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -261,20 +261,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); } -static dma_addr_t -gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) -{ - dma_addr_t map; - unsigned long align_mask; - - align_mask = (1UL << get_order(size)) - 1; - map = dma_map_area(dev, paddr, size, dir, align_mask); - - flush_gart(); - - return map; -} - /* Map a single area into the IOMMU */ static dma_addr_t gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) @@ -512,12 +498,21 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag) { void *vaddr; + unsigned long align_mask; vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); if (!vaddr) return NULL; - *dma_addr = gart_map_single(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL); + align_mask = (1UL << get_order(size)) - 1; + + if (!dev) + dev = &x86_dma_fallback_dev; + + *dma_addr = dma_map_area(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL, + align_mask); + flush_gart(); + if (*dma_addr != bad_dma_address) return vaddr; -- cgit v1.2.3 From 551b4545bf9658dc5ae5a1277dcd4d7bf0028b28 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 5 Sep 2008 17:58:49 +0900 Subject: x86: gart alloc_coherent doesn't need to check NULL device argument asm/dma-mapping.h guarantees that gart alloc_coherent doesn't get NULL device argument. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 4d0864900b8c..0b99d4a06f74 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -506,9 +506,6 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, align_mask = (1UL << get_order(size)) - 1; - if (!dev) - dev = &x86_dma_fallback_dev; - *dma_addr = dma_map_area(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL, align_mask); flush_gart(); -- cgit v1.2.3 From ac4ff656c07ada78316307b0c0ce8a8eb48aa6dd Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 10 Sep 2008 01:06:47 +0900 Subject: x86: convert gart to use is_buffer_dma_capable helper function Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 0b99d4a06f74..1b0c412566e5 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -214,24 +214,14 @@ static void iommu_full(struct device *dev, size_t size, int dir) static inline int need_iommu(struct device *dev, unsigned long addr, size_t size) { - u64 mask = *dev->dma_mask; - int high = addr + size > mask; - int mmu = high; - - if (force_iommu) - mmu = 1; - - return mmu; + return force_iommu || + !is_buffer_dma_capable(*dev->dma_mask, addr, size); } static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size) { - u64 mask = *dev->dma_mask; - int high = addr + size > mask; - int mmu = high; - - return mmu; + return !is_buffer_dma_capable(*dev->dma_mask, addr, size); } /* Map a single continuous physical area into the IOMMU. -- cgit v1.2.3 From bee44f294efd8417f5e68553778a6cc957af1547 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 12 Sep 2008 19:42:35 +0900 Subject: x86: make GART to respect device's dma_mask about virtual mappings Currently, GART IOMMU ingores device's dma_mask when it does virtual mappings. So it could give a device a virtual address that the device can't access to. This patch fixes the above problem. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 1b0c412566e5..9972c42ac925 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -83,23 +83,34 @@ static unsigned long next_bit; /* protected by iommu_bitmap_lock */ static int need_flush; /* global flush state. set for each gart wrap */ static unsigned long alloc_iommu(struct device *dev, int size, - unsigned long align_mask) + unsigned long align_mask, u64 dma_mask) { unsigned long offset, flags; unsigned long boundary_size; unsigned long base_index; + unsigned long limit; base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; + limit = iommu_device_max_index(iommu_pages, + DIV_ROUND_UP(iommu_bus_base, PAGE_SIZE), + dma_mask >> PAGE_SHIFT); + spin_lock_irqsave(&iommu_bitmap_lock, flags); - offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, + + if (limit <= next_bit) { + need_flush = 1; + next_bit = 0; + } + + offset = iommu_area_alloc(iommu_gart_bitmap, limit, next_bit, size, base_index, boundary_size, align_mask); - if (offset == -1) { + if (offset == -1 && next_bit) { need_flush = 1; - offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, + offset = iommu_area_alloc(iommu_gart_bitmap, limit, 0, size, base_index, boundary_size, align_mask); } @@ -228,12 +239,14 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) * Caller needs to check if the iommu is needed and flush. */ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, - size_t size, int dir, unsigned long align_mask) + size_t size, int dir, unsigned long align_mask, + u64 dma_mask) { unsigned long npages = iommu_num_pages(phys_mem, size); - unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); + unsigned long iommu_page; int i; + iommu_page = alloc_iommu(dev, npages, align_mask, dma_mask); if (iommu_page == -1) { if (!nonforced_iommu(dev, phys_mem, size)) return phys_mem; @@ -263,7 +276,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) if (!need_iommu(dev, paddr, size)) return paddr; - bus = dma_map_area(dev, paddr, size, dir, 0); + bus = dma_map_area(dev, paddr, size, dir, 0, dma_get_mask(dev)); flush_gart(); return bus; @@ -314,6 +327,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, { struct scatterlist *s; int i; + u64 dma_mask = dma_get_mask(dev); #ifdef CONFIG_IOMMU_DEBUG printk(KERN_DEBUG "dma_map_sg overflow\n"); @@ -323,7 +337,8 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, unsigned long addr = sg_phys(s); if (nonforced_iommu(dev, addr, s->length)) { - addr = dma_map_area(dev, addr, s->length, dir, 0); + addr = dma_map_area(dev, addr, s->length, dir, 0, + dma_mask); if (addr == bad_dma_address) { if (i > 0) gart_unmap_sg(dev, sg, i, dir); @@ -345,14 +360,16 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages) { - unsigned long iommu_start = alloc_iommu(dev, pages, 0); - unsigned long iommu_page = iommu_start; + unsigned long iommu_start; + unsigned long iommu_page; struct scatterlist *s; int i; + iommu_start = alloc_iommu(dev, pages, 0, dma_get_mask(dev)); if (iommu_start == -1) return -1; + iommu_page = iommu_start; for_each_sg(start, s, nelems, i) { unsigned long pages, addr; unsigned long phys_addr = s->dma_address; @@ -497,7 +514,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, align_mask = (1UL << get_order(size)) - 1; *dma_addr = dma_map_area(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL, - align_mask); + align_mask, dma_mask); flush_gart(); if (*dma_addr != bad_dma_address) -- cgit v1.2.3 From f6a32a36ab96016675cd414802904feb288d7899 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 11 Sep 2008 23:08:48 +0900 Subject: x86: gart alloc_coherent does virtual mapppings only when necessary gart alloc_coherent need to do virtual mapppings only when an allocated buffer is not DMA-capable for a device. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 9972c42ac925..9739d5682093 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -505,15 +505,23 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag) { void *vaddr; + dma_addr_t paddr; unsigned long align_mask; + u64 dma_mask = dma_alloc_coherent_mask(dev, flag); vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); if (!vaddr) return NULL; + paddr = virt_to_phys(vaddr); + if (is_buffer_dma_capable(dma_mask, paddr, size)) { + *dma_addr = paddr; + return vaddr; + } + align_mask = (1UL << get_order(size)) - 1; - *dma_addr = dma_map_area(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL, + *dma_addr = dma_map_area(dev, paddr, size, DMA_BIDIRECTIONAL, align_mask, dma_mask); flush_gart(); -- cgit v1.2.3 From 2842e5bf3115193f05dc9dac20f940e7abf44c1a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 18 Sep 2008 15:23:43 +0200 Subject: x86: move GART TLB flushing options to generic code The GART currently implements the iommu=[no]fullflush command line parameters which influence its IO/TLB flushing strategy. This patch makes these parameters generic so that they can be used by the AMD IOMMU too. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 9739d5682093..508ef470b27f 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -45,15 +45,6 @@ static unsigned long iommu_pages; /* .. and in pages */ static u32 *iommu_gatt_base; /* Remapping table */ -/* - * If this is disabled the IOMMU will use an optimized flushing strategy - * of only flushing when an mapping is reused. With it true the GART is - * flushed for every mapping. Problem is that doing the lazy flush seems - * to trigger bugs with some popular PCI cards, in particular 3ware (but - * has been also also seen with Qlogic at least). - */ -int iommu_fullflush = 1; - /* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); /* Guarded by iommu_bitmap_lock: */ @@ -901,10 +892,6 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; - if (!strncmp(p, "fullflush", 8)) - iommu_fullflush = 1; - if (!strncmp(p, "nofullflush", 11)) - iommu_fullflush = 0; if (!strncmp(p, "noagp", 5)) no_agp = 1; if (!strncmp(p, "noaperture", 10)) -- cgit v1.2.3 From d26dbc5cf94b0a28acc947285c3b54814a73cb2e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 22 Sep 2008 22:35:07 +0900 Subject: iommu: export iommu_area_reserve helper function x86 has set_bit_string() that does the exact same thing that set_bit_area() in lib/iommu-helper.c does. This patch exports set_bit_area() in lib/iommu-helper.c as iommu_area_reserve(), converts GART, Calgary, and AMD IOMMU to use it. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 508ef470b27f..3dcb1ad86e38 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -827,7 +827,7 @@ void __init gart_iommu_init(void) * Out of IOMMU space handling. * Reserve some invalid pages at the beginning of the GART. */ - set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); + iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); agp_memory_reserved = iommu_size; printk(KERN_INFO -- cgit v1.2.3 From afa9fdc2f5f8e4d98f3e77bfa204412cbc181346 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 20 Sep 2008 01:23:30 +0900 Subject: iommu: remove fullflush and nofullflush in IOMMU generic option This patch against tip/x86/iommu virtually reverts 2842e5bf3115193f05dc9dac20f940e7abf44c1a. But just reverting the commit breaks AMD IOMMU so this patch also includes some fixes. The above commit adds new two options to x86 IOMMU generic kernel boot options, fullflush and nofullflush. But such change that affects all the IOMMUs needs more discussion (all IOMMU parties need the chance to discuss it): http://lkml.org/lkml/2008/9/19/106 Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 3dcb1ad86e38..9e390f1bd46a 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -45,6 +45,15 @@ static unsigned long iommu_pages; /* .. and in pages */ static u32 *iommu_gatt_base; /* Remapping table */ +/* + * If this is disabled the IOMMU will use an optimized flushing strategy + * of only flushing when an mapping is reused. With it true the GART is + * flushed for every mapping. Problem is that doing the lazy flush seems + * to trigger bugs with some popular PCI cards, in particular 3ware (but + * has been also also seen with Qlogic at least). + */ +int iommu_fullflush = 1; + /* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); /* Guarded by iommu_bitmap_lock: */ @@ -892,6 +901,10 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; + if (!strncmp(p, "fullflush", 8)) + iommu_fullflush = 1; + if (!strncmp(p, "nofullflush", 11)) + iommu_fullflush = 0; if (!strncmp(p, "noagp", 5)) no_agp = 1; if (!strncmp(p, "noaperture", 10)) -- cgit v1.2.3 From ecef533ea68b2fb3baaf459beb2f802a240bdb16 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 20:48:36 +0900 Subject: revert "x86: make GART to respect device's dma_mask about virtual mappings" This reverts: commit bee44f294efd8417f5e68553778a6cc957af1547 Author: FUJITA Tomonori Date: Fri Sep 12 19:42:35 2008 +0900 x86: make GART to respect device's dma_mask about virtual mappings I wrote the above commit to fix a GART alloc_coherent regression, that can't handle a device having dma_masks > 24bit < 32bits, introduced by the alloc_coherent rewrite: http://lkml.org/lkml/2008/8/12/200 After the alloc_coherent rewrite, GART alloc_coherent tried to allocate pages with GFP_DMA32. If GART got an address that a device can't access to, GART mapped the address to a virtual I/O address. But GART mapping mechanism didn't take account of dma mask, so GART could use a virtual I/O address that the device can't access to again. Alan pointed out: " This is indeed a specific problem found with things like older AACRAID where control blocks must be below 31bits and the GART is above 0x80000000. " The above commit modified GART mapping mechanism to take care of dma mask. But Andi pointed out, "The GART is somewhere in the 4GB range so you cannot use it to map anything < 4GB. Also GART is pretty small." http://lkml.org/lkml/2008/9/12/43 That means it's possible that GART doesn't have virtual I/O address space that a device can access to. The above commit (to modify GART mapping mechanism to take care of dma mask) can't fix the regression reliably so let's avoid making GART more complicated. We need a solution that always works for dma_masks > 24bit < 32bits. That's how GART worked before the alloc_coherent rewrite. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Acked-by: Alan Cox Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 39 +++++++++++---------------------------- 1 file changed, 11 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 9e390f1bd46a..7e08e466b8ad 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -83,34 +83,23 @@ static unsigned long next_bit; /* protected by iommu_bitmap_lock */ static int need_flush; /* global flush state. set for each gart wrap */ static unsigned long alloc_iommu(struct device *dev, int size, - unsigned long align_mask, u64 dma_mask) + unsigned long align_mask) { unsigned long offset, flags; unsigned long boundary_size; unsigned long base_index; - unsigned long limit; base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; - limit = iommu_device_max_index(iommu_pages, - DIV_ROUND_UP(iommu_bus_base, PAGE_SIZE), - dma_mask >> PAGE_SHIFT); - spin_lock_irqsave(&iommu_bitmap_lock, flags); - - if (limit <= next_bit) { - need_flush = 1; - next_bit = 0; - } - - offset = iommu_area_alloc(iommu_gart_bitmap, limit, next_bit, + offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, size, base_index, boundary_size, align_mask); - if (offset == -1 && next_bit) { + if (offset == -1) { need_flush = 1; - offset = iommu_area_alloc(iommu_gart_bitmap, limit, 0, + offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, size, base_index, boundary_size, align_mask); } @@ -239,14 +228,12 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) * Caller needs to check if the iommu is needed and flush. */ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, - size_t size, int dir, unsigned long align_mask, - u64 dma_mask) + size_t size, int dir, unsigned long align_mask) { unsigned long npages = iommu_num_pages(phys_mem, size); - unsigned long iommu_page; + unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); int i; - iommu_page = alloc_iommu(dev, npages, align_mask, dma_mask); if (iommu_page == -1) { if (!nonforced_iommu(dev, phys_mem, size)) return phys_mem; @@ -276,7 +263,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) if (!need_iommu(dev, paddr, size)) return paddr; - bus = dma_map_area(dev, paddr, size, dir, 0, dma_get_mask(dev)); + bus = dma_map_area(dev, paddr, size, dir, 0); flush_gart(); return bus; @@ -327,7 +314,6 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, { struct scatterlist *s; int i; - u64 dma_mask = dma_get_mask(dev); #ifdef CONFIG_IOMMU_DEBUG printk(KERN_DEBUG "dma_map_sg overflow\n"); @@ -337,8 +323,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, unsigned long addr = sg_phys(s); if (nonforced_iommu(dev, addr, s->length)) { - addr = dma_map_area(dev, addr, s->length, dir, 0, - dma_mask); + addr = dma_map_area(dev, addr, s->length, dir, 0); if (addr == bad_dma_address) { if (i > 0) gart_unmap_sg(dev, sg, i, dir); @@ -360,16 +345,14 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages) { - unsigned long iommu_start; - unsigned long iommu_page; + unsigned long iommu_start = alloc_iommu(dev, pages, 0); + unsigned long iommu_page = iommu_start; struct scatterlist *s; int i; - iommu_start = alloc_iommu(dev, pages, 0, dma_get_mask(dev)); if (iommu_start == -1) return -1; - iommu_page = iommu_start; for_each_sg(start, s, nelems, i) { unsigned long pages, addr; unsigned long phys_addr = s->dma_address; @@ -522,7 +505,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, align_mask = (1UL << get_order(size)) - 1; *dma_addr = dma_map_area(dev, paddr, size, DMA_BIDIRECTIONAL, - align_mask, dma_mask); + align_mask); flush_gart(); if (*dma_addr != bad_dma_address) -- cgit v1.2.3 From 1d990882153f36723f9e8717c4401689e64c7a36 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 20:48:37 +0900 Subject: x86: restore old GART alloc_coherent behavior Currently, GART alloc_coherent tries to allocate pages with GFP_DMA32 for a device having dma_masks > 24bit < 32bits. If GART gets an address that a device can't access to, GART try to map the address to a virtual I/O address that the device can access to. But Andi pointed out, "The GART is somewhere in the 4GB range so you cannot use it to map anything < 4GB. Also GART is pretty small." http://lkml.org/lkml/2008/9/12/43 That is, it's possible that GART doesn't have virtual I/O address space that a device can access to. The above behavior doesn't work for a device having dma_masks > 24bit < 32bits. This patch restores old GART alloc_coherent behavior (before the alloc_coherent rewrite). Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 7e08e466b8ad..25c94fb96d74 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -487,31 +487,28 @@ static void * gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag) { - void *vaddr; dma_addr_t paddr; unsigned long align_mask; - u64 dma_mask = dma_alloc_coherent_mask(dev, flag); - - vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); - if (!vaddr) - return NULL; - - paddr = virt_to_phys(vaddr); - if (is_buffer_dma_capable(dma_mask, paddr, size)) { - *dma_addr = paddr; - return vaddr; - } - - align_mask = (1UL << get_order(size)) - 1; - - *dma_addr = dma_map_area(dev, paddr, size, DMA_BIDIRECTIONAL, - align_mask); - flush_gart(); - - if (*dma_addr != bad_dma_address) - return vaddr; - - free_pages((unsigned long)vaddr, get_order(size)); + struct page *page; + + if (force_iommu && !(flag & GFP_DMA)) { + flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + page = alloc_pages(flag | __GFP_ZERO, get_order(size)); + if (!page) + return NULL; + + align_mask = (1UL << get_order(size)) - 1; + paddr = dma_map_area(dev, page_to_phys(page), size, + DMA_BIDIRECTIONAL, align_mask); + + flush_gart(); + if (paddr != bad_dma_address) { + *dma_addr = paddr; + return page_address(page); + } + __free_pages(page, get_order(size)); + } else + return dma_generic_alloc_coherent(dev, size, dma_addr, flag); return NULL; } -- cgit v1.2.3 From 1615965e54eb94d7bcd298d2163739bd79f602d4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 22:41:10 +0900 Subject: x86 gart: remove unnecessary initialization There is no point to have such initialization in struct dma_mapping_ops. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 25c94fb96d74..b85a2f9bb340 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -715,12 +715,6 @@ extern int agp_amd64_init(void); static struct dma_mapping_ops gart_dma_ops = { .map_single = gart_map_single, .unmap_single = gart_unmap_single, - .sync_single_for_cpu = NULL, - .sync_single_for_device = NULL, - .sync_single_range_for_cpu = NULL, - .sync_single_range_for_device = NULL, - .sync_sg_for_cpu = NULL, - .sync_sg_for_device = NULL, .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, .alloc_coherent = gart_alloc_coherent, -- cgit v1.2.3 From 237a62247c2879331986a300d6ab36ad21264c68 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 25 Sep 2008 12:13:53 +0200 Subject: x86/iommu: make GART driver checkpatch clean Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index b85a2f9bb340..aa569db73a2a 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -27,8 +27,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -175,7 +175,8 @@ static void dump_leak(void) iommu_leak_pages); for (i = 0; i < iommu_leak_pages; i += 2) { printk(KERN_DEBUG "%lu: ", iommu_pages-i); - printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0); + printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], + 0); printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); } printk(KERN_DEBUG "\n"); @@ -688,7 +689,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) if (!error) error = sysdev_register(&device_gart); if (error) - panic("Could not register gart_sysdev -- would corrupt data on next suspend"); + panic("Could not register gart_sysdev -- " + "would corrupt data on next suspend"); flush_gart(); @@ -710,8 +712,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) return -1; } -extern int agp_amd64_init(void); - static struct dma_mapping_ops gart_dma_ops = { .map_single = gart_map_single, .unmap_single = gart_unmap_single, @@ -777,8 +777,8 @@ void __init gart_iommu_init(void) (no_agp && init_k8_gatt(&info) < 0)) { if (max_pfn > MAX_DMA32_PFN) { printk(KERN_WARNING "More than 4GB of memory " - "but GART IOMMU not available.\n" - KERN_WARNING "falling back to iommu=soft.\n"); + "but GART IOMMU not available.\n"); + printk(KERN_WARNING "falling back to iommu=soft.\n"); } return; } @@ -868,7 +868,8 @@ void __init gart_parse_options(char *p) if (!strncmp(p, "leak", 4)) { leak_trace = 1; p += 4; - if (*p == '=') ++p; + if (*p == '=') + ++p; if (isdigit(*p) && get_option(&p, &arg)) iommu_leak_pages = arg; } -- cgit v1.2.3 From 3610f2116e961cdcbd3546a3828470f7aa636212 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 25 Sep 2008 12:13:54 +0200 Subject: x86/iommu: convert GART need_flush to bool Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index aa569db73a2a..aecea068f583 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -80,7 +80,7 @@ AGPEXTERN int agp_memory_reserved; AGPEXTERN __u32 *agp_gatt_table; static unsigned long next_bit; /* protected by iommu_bitmap_lock */ -static int need_flush; /* global flush state. set for each gart wrap */ +static bool need_flush; /* global flush state. set for each gart wrap */ static unsigned long alloc_iommu(struct device *dev, int size, unsigned long align_mask) @@ -98,7 +98,7 @@ static unsigned long alloc_iommu(struct device *dev, int size, offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, size, base_index, boundary_size, align_mask); if (offset == -1) { - need_flush = 1; + need_flush = true; offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, size, base_index, boundary_size, align_mask); @@ -107,11 +107,11 @@ static unsigned long alloc_iommu(struct device *dev, int size, next_bit = offset+size; if (next_bit >= iommu_pages) { next_bit = 0; - need_flush = 1; + need_flush = true; } } if (iommu_fullflush) - need_flush = 1; + need_flush = true; spin_unlock_irqrestore(&iommu_bitmap_lock, flags); return offset; @@ -136,7 +136,7 @@ static void flush_gart(void) spin_lock_irqsave(&iommu_bitmap_lock, flags); if (need_flush) { k8_flush_garts(); - need_flush = 0; + need_flush = false; } spin_unlock_irqrestore(&iommu_bitmap_lock, flags); } -- cgit v1.2.3 From 0114267be1bebc2e9c913b19579900153583d617 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 25 Sep 2008 12:42:12 +0200 Subject: x86/iommu: use __GFP_ZERO instead of memset for GART Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel/pci-gart_64.c') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index aecea068f583..d077116fec1b 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -674,13 +674,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) info->aper_size = aper_size >> 20; gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); - gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); + gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(gatt_size)); if (!gatt) panic("Cannot allocate GATT table"); if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) panic("Could not set GART PTEs to uncacheable pages"); - memset(gatt, 0, gatt_size); agp_gatt_table = gatt; enable_gart_translations(); @@ -788,19 +788,16 @@ void __init gart_iommu_init(void) iommu_size = check_iommu_size(info.aper_base, aper_size); iommu_pages = iommu_size >> PAGE_SHIFT; - iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL, + iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(iommu_pages/8)); if (!iommu_gart_bitmap) panic("Cannot allocate iommu bitmap\n"); - memset(iommu_gart_bitmap, 0, iommu_pages/8); #ifdef CONFIG_IOMMU_LEAK if (leak_trace) { - iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, + iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, get_order(iommu_pages*sizeof(void *))); - if (iommu_leak_tab) - memset(iommu_leak_tab, 0, iommu_pages * 8); - else + if (!iommu_leak_tab) printk(KERN_DEBUG "PCI-DMA: Cannot allocate leak trace area\n"); } -- cgit v1.2.3