tegra: define I/O virtual memory manager interface

tegra SoCs include a mechanism to remap discontiguous system memory into a contiguous region in the addressable virtual address space of DMA devices (GPU, APB and AHB peripherals, etc.). On tegra and tegra 2 devices, this is called the GART. the IOVMM manager provides an abstraction for client drivers and OS subsystems to access this functionality using concepts familiar from the operating system's virtual memory system: reserving and decommiting of virtual address regions, adding virtual-to-physical translations into reserved regions and context-switching between multiple address spaces. IOVMM provides a driver HAL to allow for future enhancements to the hardware (additional address spaces, larger translation regions, demand-loading of translations), while keeping the exposed client API stable. IOVMM uses a best-fit allocator implemented as a double red-black tree - one tree of all blocks ordered by address to facilitate efficient merging of free blocks, and one tree ordered by size to facilitate efficient allocation. add kernel configurations for support IOVMM devices (currently GART, supported by Tegra and Tegra 2 SoCs), and a top-level IOVMM kernel config which is selected automatically whenever a device is enabled. enable IOVMM by default for Harmony. Change-Id: Ic3c85d45654300a09bc7f1f824b32824ec956ea6 Reviewed-on: http://git-master/r/398 Reviewed-by: Gary King <gking@nvidia.com> Tested-by: Gary King <gking@nvidia.com>
author: Gary King <GKing@nvidia.com> 2010-02-03 20:49:02 -0800
committer: Gary King <gking@nvidia.com> 2010-03-02 17:22:49 -0800
commit: 9e6f4b60adfec9c1d627aed742c31b46615dec92 (patch)
tree: 3c166c8c7c14ba51356b5dcdcd4dbc99c15dd485 /arch
parent: e0fba00523aeacf1372351f8bc9c5338dc5a8a1e (diff)
7 files changed, 1491 insertions, 0 deletions
diff --git a/arch/arm/configs/tegra_harmony_android_defconfig b/arch/arm/configs/tegra_harmony_android_defconfig
index 03dc2bbfe6e7..df33d945a67f 100644
--- a/arch/arm/configs/tegra_harmony_android_defconfig
+++ b/arch/arm/configs/tegra_harmony_android_defconfig
@@ -192,6 +192,8 @@ CONFIG_ARCH_TEGRA=y
 CONFIG_ARCH_TEGRA_2x_SOC=y
 CONFIG_MACH_TEGRA_GENERIC=y
 CONFIG_TEGRA_SYSTEM_DMA=y
+CONFIG_TEGRA_IOVMM_GART=y
+CONFIG_TEGRA_IOVMM=y
 # CONFIG_MACH_TEGRA_GENERIC_DEBUG is not set
 CONFIG_TEGRA_ODM_RFKILL=y
 CONFIG_TEGRA_NVEC=y
diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig
index 85154422c9cf..584ca22af791 100644
--- a/arch/arm/mach-tegra/Kconfig
+++ b/arch/arm/mach-tegra/Kconfig
@@ -42,6 +42,20 @@ config TEGRA_SYSTEM_DMA
 	  Adds system DMA functionality for NVIDIA Tegra SoCs, used by
 	  several Tegra device drivers
 
+config TEGRA_IOVMM_GART
+	bool "Enable I/O virtual memory manager for GART"
+	depends on ARCH_TEGRA_1x_SOC || ARCH_TEGRA_2x_SOC
+	default y
+	select TEGRA_IOVMM
+	help
+	  Enables support for remapping discontiguous physical memory
+	  shared with the operating system into contiguous I/O virtual
+	  space through the GART hardware included on Tegra and
+	  Tegra 2 SoCs
+
+config TEGRA_IOVMM
+	bool
+
 config MACH_TEGRA_GENERIC_DEBUG
 	bool "Enable debug logging for Tegra generic drivers"
 	depends on MACH_TEGRA_GENERIC
diff --git a/arch/arm/mach-tegra/Makefile b/arch/arm/mach-tegra/Makefile
index 534270fba050..804825fa89c4 100644
--- a/arch/arm/mach-tegra/Makefile
+++ b/arch/arm/mach-tegra/Makefile
@@ -16,6 +16,10 @@ obj-y					+= irq_gpio.o
 obj-y					+= timer.o
 obj-y					+= tegra_sysmap.o
 
+# IOVMM support
+obj-$(CONFIG_TEGRA_IOVMM_GART)		+= iovmm-gart.o
+obj-$(CONFIG_TEGRA_IOVMM)		+= iovmm.o
+
 # Tegra suspend operation
 obj-$(CONFIG_PM)		+= suspend_ops.o
 
diff --git a/arch/arm/mach-tegra/include/mach/iovmm.h b/arch/arm/mach-tegra/include/mach/iovmm.h
new file mode 100644
index 000000000000..1549e5b628fe
--- /dev/null
+++ b/arch/arm/mach-tegra/include/mach/iovmm.h
@@ -0,0 +1,187 @@
+/*
+ * arch/arm/mach-tegra/include/mach/iovmm.h
+ *
+ * Copyright (c) 2010, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed i the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/rbtree.h>
+#include <linux/rwsem.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#if defined(CONFIG_ARCH_TEGRA_1x_SOC) || defined(CONFIG_ARCH_TEGRA_2x_SOC)
+typedef u32 tegra_iovmm_addr_t;
+#else
+#error "Unsupported tegra architecture family"
+#endif
+
+struct tegra_iovmm_device_ops;
+
+/* each I/O virtual memory manager unit should register a device with
+ * the iovmm system
+ */
+struct tegra_iovmm_device {
+	struct tegra_iovmm_device_ops	*ops;
+	const char			*name;
+	struct list_head		list;
+	int				pgsize_bits;
+};
+
+/* tegra_iovmm_domain serves a purpose analagous to mm_struct as defined in
+ * <linux/mm_types.h> - it defines a virtual address space within which
+ * tegra_iovmm_areas can be created.
+ */
+struct tegra_iovmm_domain {
+	atomic_t		clients;
+	atomic_t		locks;
+	spinlock_t		block_lock;
+	unsigned long		flags;
+	wait_queue_head_t	delay_lock;  /* when lock_client fails */
+	struct rw_semaphore	map_lock;
+	struct rb_root		all_blocks;  /* ordered by address */
+	struct rb_root		free_blocks; /* ordered by size */
+	struct tegra_iovmm_device *dev;
+};
+
+/* tegra_iovmm_client is analagous to an individual task in the task group
+ * which owns an mm_struct.
+ */
+
+struct iovmm_share_group;
+
+struct tegra_iovmm_client {
+	const char 			*name;
+	unsigned long			flags;
+	struct iovmm_share_group	*group;
+	struct tegra_iovmm_domain	*domain;
+	struct list_head		list;
+};
+
+/* tegra_iovmm_area serves a purpose analagous to vm_area_struct as defined
+ * in <linux/mm_types.h> - it defines a virtual memory area which can be
+ * mapped to physical memory by a client-provided mapping function. */
+
+struct tegra_iovmm_area {
+	struct tegra_iovmm_domain	*domain;
+	tegra_iovmm_addr_t		iovm_start;
+	tegra_iovmm_addr_t		iovm_length;
+	pgprot_t			pgprot;
+	struct tegra_iovmm_area_ops	*ops;
+};
+
+struct tegra_iovmm_device_ops {
+	/* maps a VMA using the page residency functions provided by the VMA */
+	int (*map)(struct tegra_iovmm_device *dev,
+		struct tegra_iovmm_area *io_vma);
+	/* marks all PTEs in a VMA as invalid; decommits the virtual addres
+	 * space (potentially freeing PDEs when decommit is true.) */
+	void (*unmap)(struct tegra_iovmm_device *dev,
+		struct tegra_iovmm_area *io_vma, bool decommit);
+	void (*map_pfn)(struct tegra_iovmm_device *dev,
+		struct tegra_iovmm_area *io_vma,
+		tegra_iovmm_addr_t offs, unsigned long pfn);
+	/* ensures that a domain is resident in the hardware's mapping region
+	 * so that it may be used by a client */
+	int (*lock_domain)(struct tegra_iovmm_device *dev,
+		struct tegra_iovmm_domain *domain);
+	void (*unlock_domain)(struct tegra_iovmm_device *dev,
+		struct tegra_iovmm_domain *domain);
+	/* allocates a vmm_domain for the specified client; may return the same
+	 * domain for multiple clients */
+	struct tegra_iovmm_domain* (*alloc_domain)(
+		struct tegra_iovmm_device *dev,
+		struct tegra_iovmm_client *client);
+	void (*free_domain)(struct tegra_iovmm_device *dev,
+		struct tegra_iovmm_domain *domain);
+};
+
+struct tegra_iovmm_area_ops {
+	/* ensures that the page of data starting at the specified offset
+	 * from the start of the iovma is resident and pinned for use by
+	 * DMA, returns the system pfn, or an invalid pfn if the
+	 * operation fails. */
+	unsigned long (*lock_makeresident)(struct tegra_iovmm_area *area,
+		tegra_iovmm_addr_t offs);
+	/* called when the page is unmapped from the I/O VMA */
+	void (*release)(struct tegra_iovmm_area *area, tegra_iovmm_addr_t offs);
+};
+
+/* called by clients to allocate an I/O VMM client mapping context which
+ * will be shared by all clients in the same share_group */
+struct tegra_iovmm_client *tegra_iovmm_alloc_client(const char *name,
+	const char *share_group);
+
+size_t tegra_iovmm_get_vm_size(struct tegra_iovmm_client *client);
+
+void tegra_iovmm_free_client(struct tegra_iovmm_client *client);
+
+/* called by clients to ensure that their mapping context is resident
+ * before performing any DMA operations addressing I/O VMM regions.
+ * client_lock may return -EINTR. */
+int tegra_iovmm_client_lock(struct tegra_iovmm_client *client);
+int tegra_iovmm_client_trylock(struct tegra_iovmm_client *client);
+
+/* called by clients after DMA operations are complete */
+void tegra_iovmm_client_unlock(struct tegra_iovmm_client *client);
+
+/* called by clients to allocate a new iovmm_area and reserve I/O virtual
+ * address space for it. if ops is NULL, clients should subsequently call
+ * tegra_iovmm_vm_map_pages and/or tegra_iovmm_vm_insert_pfn to explicitly
+ * map the I/O virtual address to an OS-allocated page or physical address,
+ * respectively. VM operations may be called before this call returns */
+struct tegra_iovmm_area *tegra_iovmm_create_vm(
+	struct tegra_iovmm_client *client, struct tegra_iovmm_area_ops *ops,
+	unsigned long size, pgprot_t pgprot);
+
+/* called by clients to "zap" an iovmm_area, and replace all mappings
+ * in it with invalid ones, without freeing the virtual address range */
+void tegra_iovmm_zap_vm(struct tegra_iovmm_area *vm);
+
+/* after zapping a demand-loaded iovmm_area, the client should unzap it
+ * to allow the VMM device to remap the page range. */
+void tegra_iovmm_unzap_vm(struct tegra_iovmm_area *vm);
+
+/* called by clients to return an iovmm_area to the free pool for the domain */
+void tegra_iovmm_free_vm(struct tegra_iovmm_area *vm);
+
+/* called by client software to map the page-aligned I/O address vaddr to
+ * a specific physical address pfn. I/O VMA should have been created with
+ * a NULL tegra_iovmm_area_ops structure. */
+void tegra_iovmm_vm_insert_pfn(struct tegra_iovmm_area *area,
+	tegra_iovmm_addr_t vaddr, unsigned long pfn);
+
+/* called by clients to return the iovmm_area containing addr, or NULL if
+ * addr has not been allocated. caller should call tegra_iovmm_put_area when
+ * finished using the returned pointer */
+struct tegra_iovmm_area *tegra_iovmm_find_area_get(
+	struct tegra_iovmm_client *client, tegra_iovmm_addr_t addr);
+
+struct tegra_iovmm_area *tegra_iovmm_area_get(struct tegra_iovmm_area *vm);
+void tegra_iovmm_area_put(struct tegra_iovmm_area *vm);
+
+/* called by drivers to initialize a tegra_iovmm_domain structure */
+int tegra_iovmm_domain_init(struct tegra_iovmm_domain *domain,
+	struct tegra_iovmm_device *dev, tegra_iovmm_addr_t start,
+	tegra_iovmm_addr_t end);
+
+/* called by drivers to register an I/O VMM device with the system */
+int tegra_iovmm_register(struct tegra_iovmm_device *dev);
+
+/* called by drivers to remove an I/O VMM device from the system */
+int tegra_iovmm_unregister(struct tegra_iovmm_device *dev);
diff --git a/arch/arm/mach-tegra/iovmm-gart.c b/arch/arm/mach-tegra/iovmm-gart.c
new file mode 100644
index 000000000000..d95eed9d9c61
--- /dev/null
+++ b/arch/arm/mach-tegra/iovmm-gart.c
@@ -0,0 +1,370 @@
+/*
+ * arch/arm/mach-tegra/iovmm-gart.c
+ *
+ * Tegra I/O VMM implementation for GART devices in Tegra and Tegra 2 series
+ * systems-on-a-chip.
+ *
+ * Copyright (c) 2010, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <asm/io.h>
+#include <asm/cacheflush.h>
+#include "mach/iovmm.h"
+#include "nvrm_drf.h"
+
+#if defined(CONFIG_ARCH_TEGRA_2x_SOC)
+#include "ap20/armc.h"
+#elif defined(CONFIG_ARCH_TEGRA_1x_SOC)
+#include "ap15/armc.h"
+#else
+#error "Unknown Tegra chip family!"
+#endif
+
+#define VMM_NAME "iovmm-gart"
+#define DRIVER_NAME "tegra_gart"
+
+#define GART_PAGE_SHIFT (12)
+#define GART_PAGE_MASK (~((1<<GART_PAGE_SHIFT)-1))
+
+struct gart_device {
+	void __iomem		*regs;
+	u32			*savedata;
+	u32			page_count; /* total remappable size */
+	tegra_iovmm_addr_t	iovmm_base; /* offset to apply to vmm_area */
+	spinlock_t		pte_lock;
+	struct tegra_iovmm_device iovmm;
+	struct tegra_iovmm_domain domain;
+	bool			enable;
+	bool			needs_barrier; /* emulator WAR */
+};
+
+static int gart_map(struct tegra_iovmm_device *, struct tegra_iovmm_area *);
+static void gart_unmap(struct tegra_iovmm_device *,
+	struct tegra_iovmm_area *, bool);
+static void gart_map_pfn(struct tegra_iovmm_device *,
+	struct tegra_iovmm_area *, tegra_iovmm_addr_t, unsigned long);
+static struct tegra_iovmm_domain *gart_alloc_domain(
+	struct tegra_iovmm_device *, struct tegra_iovmm_client *);
+
+static int __init gart_probe(struct platform_device *);
+static int __devexit gart_remove(struct platform_device *);
+static int gart_suspend(struct platform_device *, pm_message_t);
+static int gart_resume(struct platform_device *);
+
+
+static struct tegra_iovmm_device_ops tegra_iovmm_gart_ops = {
+	.map		= gart_map,
+	.unmap		= gart_unmap,
+	.map_pfn	= gart_map_pfn,
+	.alloc_domain	= gart_alloc_domain,
+};
+
+static struct platform_driver tegra_iovmm_gart_drv = {
+	.probe		= gart_probe,
+	.remove		= gart_remove,
+	.suspend	= gart_suspend,
+	.resume		= gart_resume,
+	.driver		= {
+		.name	= DRIVER_NAME,
+	},
+};
+
+#define gpfn_to_gart(_g, _gpfn) (((_g)->iovmm_base>>GART_PAGE_SHIFT) + (_gpfn))
+
+static int gart_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct gart_device *gart = platform_get_drvdata(pdev);
+	unsigned int i;
+
+	if (!gart)
+		return -ENODEV;
+
+	if (!gart->enable)
+		return 0;
+
+	spin_lock(&gart->pte_lock);
+	for (i=0; i<gart->page_count; i++) {
+		u32 reg;
+		reg = NV_DRF_NUM(MC, GART_ENTRY_ADDR,
+			GART_ENTRY_ADDR_TABLE_ADDR, gpfn_to_gart(gart,i));
+		writel(reg, gart->regs + MC_GART_ENTRY_ADDR_0);
+		gart->savedata[i] = readl(gart->regs + MC_GART_ENTRY_DATA_0);
+	}
+	spin_unlock(&gart->pte_lock);
+	return 0;
+}
+
+static int gart_resume(struct platform_device *pdev)
+{
+	struct gart_device *gart = platform_get_drvdata(pdev);
+	unsigned int i;
+	u32 reg;
+
+	if (!gart || (gart->enable && !gart->savedata))
+		return -ENODEV;
+
+	if (!gart->enable)
+		return 0;
+
+	spin_lock(&gart->pte_lock);
+	for (i=0; i<gart->page_count; i++) {
+		reg = NV_DRF_NUM(MC, GART_ENTRY_ADDR,
+			GART_ENTRY_ADDR_TABLE_ADDR, gpfn_to_gart(gart, i));
+		writel(reg, gart->regs + MC_GART_ENTRY_ADDR_0);
+		writel(gart->savedata[i], gart->regs + MC_GART_ENTRY_DATA_0);
+	}
+	reg = NV_DRF_DEF(MC, GART_CONFIG, GART_ENABLE, ENABLE);
+	writel(reg, gart->regs + MC_GART_CONFIG_0);
+	spin_unlock(&gart->pte_lock);
+	vfree(gart->savedata);
+	gart->savedata = NULL;
+
+	return 0;
+}
+
+static int __devexit gart_remove(struct platform_device *pdev)
+{
+	struct gart_device *gart = platform_get_drvdata(pdev);
+
+	if (!gart)
+		return 0;
+
+	if (gart->enable) {
+		u32 reg;
+		reg = NV_DRF_DEF(MC, GART_CONFIG, GART_ENABLE, DISABLE);
+		writel(reg, gart->regs + MC_GART_CONFIG_0);
+		gart->enable = 0;
+	}
+	platform_set_drvdata(pdev, NULL);
+	tegra_iovmm_unregister(&gart->iovmm);
+	if (gart->savedata)
+		vfree(gart->savedata);
+	if (gart->regs)
+		iounmap(gart->regs);
+	kfree(gart);
+	return 0;
+}
+
+static int __init gart_probe(struct platform_device *pdev)
+{
+	struct gart_device *gart = NULL;
+	struct resource *res, *res_remap;
+	void __iomem *gart_regs = NULL;
+	u32 reg;
+	unsigned int i;
+	int e;
+
+	if (!pdev) {
+		pr_err(DRIVER_NAME ": platform_device required\n");
+		return -ENODEV;
+	}
+
+	if (PAGE_SHIFT != GART_PAGE_SHIFT) {
+		pr_err(DRIVER_NAME ": GART and CPU page size must match\n");
+		return -ENXIO;
+	}
+
+	/* the GART memory aperture is required */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	res_remap = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+
+	if (!res || !res_remap) {
+		pr_err(DRIVER_NAME ": GART memory aperture expected\n");
+		return -ENXIO;
+	}
+	gart = kzalloc(sizeof(*gart), GFP_KERNEL);
+	if (!gart) {
+		pr_err(DRIVER_NAME ": failed to allocate tegra_iovmm_device\n");
+		e = -ENOMEM;
+		goto fail;
+	}
+
+	gart_regs = ioremap_wc(res->start, res->end - res->start + 1);
+	if (!gart_regs) {
+		pr_err(DRIVER_NAME ": failed to remap GART registers\n");
+		e = -ENXIO;
+		goto fail;
+	}
+
+	gart->iovmm.name = VMM_NAME;
+	gart->iovmm.ops = &tegra_iovmm_gart_ops;
+	gart->iovmm.pgsize_bits = GART_PAGE_SHIFT;
+	spin_lock_init(&gart->pte_lock);
+
+	platform_set_drvdata(pdev, gart);
+
+	e = tegra_iovmm_register(&gart->iovmm);
+	if (e) goto fail;
+
+	e = tegra_iovmm_domain_init(&gart->domain, &gart->iovmm,
+		(tegra_iovmm_addr_t)res_remap->start,
+		(tegra_iovmm_addr_t)res_remap->end+1);
+	if (e) goto fail;
+
+	gart->regs = gart_regs;
+	gart->iovmm_base = (tegra_iovmm_addr_t)res_remap->start;
+	gart->page_count = res_remap->end - res_remap->start + 1;
+	gart->page_count >>= GART_PAGE_SHIFT;
+
+	gart->savedata = vmalloc(sizeof(u32)*gart->page_count);
+	if (!gart->savedata) {
+		pr_err(DRIVER_NAME ": failed to allocate context save area\n");
+		e = -ENOMEM;
+		goto fail;
+	}
+
+	spin_lock(&gart->pte_lock);
+	for (i=0; i<gart->page_count; i++) {
+		reg = NV_DRF_NUM(MC, GART_ENTRY_ADDR,
+			GART_ENTRY_ADDR_TABLE_ADDR, gpfn_to_gart(gart, i));
+		writel(reg, gart->regs + MC_GART_ENTRY_ADDR_0);
+		writel(0, gart->regs + MC_GART_ENTRY_DATA_0);
+	}
+	reg = NV_DRF_DEF(MC, GART_CONFIG, GART_ENABLE, ENABLE);
+	writel(reg, gart->regs + MC_GART_CONFIG_0);
+	spin_unlock(&gart->pte_lock);
+	gart->enable = 1;
+	return 0;
+
+fail:
+	if (gart_regs)
+		iounmap(gart_regs);
+	if (gart && gart->savedata)
+		vfree(gart->savedata);
+	if (gart)
+		kfree(gart);
+	return e;
+}
+
+static int __devinit gart_init(void)
+{
+	return platform_driver_register(&tegra_iovmm_gart_drv);
+}
+
+static void __exit gart_exit(void)
+{
+	return platform_driver_unregister(&tegra_iovmm_gart_drv);
+}
+
+#define GART_PTE(_valid, _pfn) \
+	(NV_DRF_NUM(MC,GART_ENTRY_DATA,GART_ENTRY_DATA_PHYS_ADDR_VALID,(_valid))|\
+	 NV_DRF_NUM(MC,GART_ENTRY_DATA,GART_ENTRY_DATA_PHYS_ADDR,\
+		((_pfn)<<PAGE_SHIFT)>>GART_PAGE_SHIFT))
+
+
+static int gart_map(struct tegra_iovmm_device *dev,
+	struct tegra_iovmm_area *iovma)
+{
+	struct gart_device *gart = container_of(dev, struct gart_device, iovmm);
+	u32 gart_page, count;
+	unsigned int i;
+
+	gart_page = iovma->iovm_start >> GART_PAGE_SHIFT;
+	count = iovma->iovm_length >> GART_PAGE_SHIFT;
+
+	for (i=0; i<count; i++) {
+		unsigned long pfn;
+		u32 reg;
+
+		pfn = iovma->ops->lock_makeresident(iovma, i<<PAGE_SHIFT);
+		if (!pfn_valid(pfn))
+			goto fail;
+
+		spin_lock(&gart->pte_lock);
+		/* gpfn translation not needed, since iovm_start already
+		 * includes the offset */
+		reg = NV_DRF_NUM(MC, GART_ENTRY_ADDR,
+			GART_ENTRY_ADDR_TABLE_ADDR, gart_page + i);
+		writel(reg, gart->regs + MC_GART_ENTRY_ADDR_0);
+		reg = GART_PTE(1,pfn);
+		writel(reg, gart->regs + MC_GART_ENTRY_DATA_0);
+		if (unlikely(gart->needs_barrier))
+			reg = readl(gart->regs + MC_GART_ENTRY_DATA_0);
+		spin_unlock(&gart->pte_lock);
+	}
+
+	dmb();
+	outer_sync();
+	return 0;
+
+fail:
+	while (i--) {
+		u32 reg;
+		iovma->ops->release(iovma, i<<PAGE_SHIFT);
+		spin_lock(&gart->pte_lock);
+		reg = NV_DRF_NUM(MC, GART_ENTRY_ADDR,
+			GART_ENTRY_ADDR_TABLE_ADDR, gart_page + i);
+		writel(reg, gart->regs + MC_GART_ENTRY_ADDR_0);
+		writel(0, gart->regs + MC_GART_ENTRY_DATA_0);
+		spin_unlock(&gart->pte_lock);
+	}
+
+	return -ENOMEM;
+}
+
+static void gart_unmap(struct tegra_iovmm_device *dev,
+	struct tegra_iovmm_area *iovma, bool decommit)
+{
+	struct gart_device *gart = container_of(dev, struct gart_device, iovmm);
+	u32 gart_page;
+	unsigned int i;
+
+	spin_lock(&gart->pte_lock);
+	for (i=0, gart_page=iovma->iovm_start;
+	    gart_page<iovma->iovm_start + iovma->iovm_length;
+	    gart_page+=(1<<GART_PAGE_SHIFT), i++) {
+		if (iovma->ops && iovma->ops->release)
+			iovma->ops->release(iovma, i<<PAGE_SHIFT);
+
+		writel(gart_page, gart->regs + MC_GART_ENTRY_ADDR_0);
+		writel(0, gart->regs + MC_GART_ENTRY_DATA_0);
+	}
+	spin_unlock(&gart->pte_lock);
+	dmb();
+	outer_sync();
+}
+
+static void gart_map_pfn(struct tegra_iovmm_device *dev,
+	struct tegra_iovmm_area *iovma, tegra_iovmm_addr_t offs,
+	unsigned long pfn)
+{
+	struct gart_device *gart = container_of(dev, struct gart_device, iovmm);
+
+	BUG_ON((pfn<<PAGE_SHIFT)>=0x40000000ul);
+	spin_lock(&gart->pte_lock);
+	writel(offs, gart->regs + MC_GART_ENTRY_ADDR_0);
+	writel(GART_PTE(1,pfn), gart->regs + MC_GART_ENTRY_DATA_0);
+	(void)readl(gart->regs + MC_GART_ENTRY_DATA_0);
+	spin_unlock(&gart->pte_lock);
+	dmb();
+	outer_sync();
+}
+
+static struct tegra_iovmm_domain *gart_alloc_domain(
+	struct tegra_iovmm_device *dev, struct tegra_iovmm_client *client)
+{
+	struct gart_device *gart = container_of(dev, struct gart_device, iovmm);
+	return &gart->domain;
+}
+
+module_init(gart_init);
+module_exit(gart_exit);
diff --git a/arch/arm/mach-tegra/iovmm.c b/arch/arm/mach-tegra/iovmm.c
new file mode 100644
index 000000000000..a2f99806946d
--- /dev/null
+++ b/arch/arm/mach-tegra/iovmm.c
@@ -0,0 +1,712 @@
+/*
+ * arch/arm/mach-tegra/iovmm.c
+ *
+ * Tegra I/O VM manager
+ *
+ * Copyright (c) 2010, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include "mach/iovmm.h"
+#include "nvrm_drf.h"
+
+/* after the best-fit block is located, the remaining pages not needed for
+ * the allocation will be split into a new free block if the number of
+ * remaining pages is >= MIN_SPLIT_PAGE.
+ */
+#define MIN_SPLIT_PAGE (4)
+#define MIN_SPLIT_BYTES(_d) (MIN_SPLIT_PAGE<<(_d)->dev->pgsize_bits)
+
+#define iovmm_start(_b) ((_b)->vm_area.iovm_start)
+#define iovmm_length(_b) ((_b)->vm_area.iovm_length)
+#define iovmm_end(_b) (iovmm_start(_b) + iovmm_length(_b))
+
+/* flags for the block */
+#define BK_free		0 /* indicates free mappings */
+#define BK_map_dirty	1 /* used by demand-loaded mappings */
+
+/* flags for the client */
+#define CL_locked	0
+
+/* flags for the domain */
+#define DM_map_dirty	0
+
+struct tegra_iovmm_block {
+	struct tegra_iovmm_area vm_area;
+	atomic_t		ref;
+	unsigned long		flags;
+	unsigned long		poison;
+	struct rb_node		free_node;
+	struct rb_node		all_node;
+};
+
+struct iovmm_share_group {
+	const char			*name;
+	struct tegra_iovmm_domain	*domain;
+	struct list_head		client_list;
+	struct list_head		group_list;
+	spinlock_t			lock;
+};
+
+static LIST_HEAD(iovmm_devices);
+static LIST_HEAD(iovmm_groups);
+static DEFINE_MUTEX(iovmm_list_lock);
+
+static tegra_iovmm_addr_t iovmm_align_up(struct tegra_iovmm_device *dev,
+	tegra_iovmm_addr_t addr)
+{
+	addr += (1<<dev->pgsize_bits);
+	addr--;
+	addr &= ~((1<<dev->pgsize_bits)-1);
+	return addr;
+}
+
+static tegra_iovmm_addr_t iovmm_align_down(struct tegra_iovmm_device *dev,
+	tegra_iovmm_addr_t addr)
+{
+	addr &= ~((1<<dev->pgsize_bits)-1);
+	return addr;
+}
+
+#define iovmprint(fmt, arg...) snprintf(page+len, count-len, fmt, ## arg)
+
+static void tegra_iovmm_block_stats(struct tegra_iovmm_domain *domain,
+	unsigned int *num_blocks, unsigned int *num_free,
+	tegra_iovmm_addr_t *total, tegra_iovmm_addr_t *total_free,
+	tegra_iovmm_addr_t *max_free)
+{
+	struct rb_node *n;
+	struct tegra_iovmm_block *b;
+
+	*num_blocks = 0;
+	*num_free = 0;
+	*total = (tegra_iovmm_addr_t)0;
+	*total_free = (tegra_iovmm_addr_t)0;
+	*max_free = (tegra_iovmm_addr_t)0;
+
+	spin_lock(&domain->block_lock);
+	n = rb_first(&domain->all_blocks);
+	while (n) {
+		b = rb_entry(n, struct tegra_iovmm_block, all_node);
+		n = rb_next(n);
+		(*num_blocks)++;
+		(*total) += iovmm_length(b);
+		if (test_bit(BK_free, &b->flags)) {
+			(*num_free)++;
+			(*total_free) += iovmm_length(b);
+			(*max_free) = max_t(tegra_iovmm_addr_t,
+				(*max_free), iovmm_length(b));
+		}
+	}
+	spin_unlock(&domain->block_lock);
+}
+
+static int tegra_iovmm_read_proc(char *page, char **start, off_t off,
+	int count, int *eof, void *data)
+{
+	struct iovmm_share_group *grp;
+	tegra_iovmm_addr_t max_free, total_free, total;
+	unsigned int num, num_free;
+
+	int len = 0;
+
+	mutex_lock(&iovmm_list_lock);
+	len += iovmprint("\ngroups\n");
+	if (list_empty(&iovmm_groups))
+		len += iovmprint("\t<empty>\n");
+	else {
+		list_for_each_entry(grp, &iovmm_groups, group_list) {
+			len += iovmprint("\t%s (device: %s)\n",
+				(grp->name) ? grp->name : "<unnamed>",
+				grp->domain->dev->name);
+			tegra_iovmm_block_stats(grp->domain, &num,
+				&num_free, &total, &total_free, &max_free);
+			total >>= 10;
+			total_free >>= 10;
+			max_free >>= 10;
+			len += iovmprint("\t\tsize: %uKiB free: %uKiB "
+				"largest: %uKiB (%u free / %u total blocks)\n",
+				total, total_free, max_free, num_free, num);
+		}
+	}
+	mutex_unlock(&iovmm_list_lock);
+
+	*eof = 1;
+	return len;
+}
+
+static void iovmm_block_put(struct tegra_iovmm_block *b)
+{
+	BUG_ON(b->poison);
+	BUG_ON(atomic_read(&b->ref)==0);
+	if (!atomic_dec_return(&b->ref)) {
+		b->poison = 0xa5a5a5a5;
+		kfree(b);
+	}
+}
+
+static void iovmm_free_block(struct tegra_iovmm_domain *domain,
+	struct tegra_iovmm_block *block)
+{
+	struct tegra_iovmm_block *pred = NULL; /* address-order predecessor */
+	struct tegra_iovmm_block *succ = NULL; /* address-order successor */
+	struct rb_node **p;
+	struct rb_node *parent = NULL, *temp;
+	int pred_free = 0, succ_free = 0;
+
+	iovmm_block_put(block);
+
+	spin_lock(&domain->block_lock);
+	temp = rb_prev(&block->all_node);
+	if (temp)
+		pred = rb_entry(temp, struct tegra_iovmm_block, all_node);
+	temp = rb_next(&block->all_node);
+	if (temp)
+		succ = rb_entry(temp, struct tegra_iovmm_block, all_node);
+
+	if (pred) pred_free = test_bit(BK_free, &pred->flags);
+	if (succ) succ_free = test_bit(BK_free, &succ->flags);
+
+	if (pred_free && succ_free) {
+		iovmm_length(pred) += iovmm_length(block);
+		iovmm_length(pred) += iovmm_length(succ);
+		rb_erase(&block->all_node, &domain->all_blocks);
+		rb_erase(&succ->all_node, &domain->all_blocks);
+		rb_erase(&succ->free_node, &domain->free_blocks);
+		rb_erase(&pred->free_node, &domain->free_blocks);
+		iovmm_block_put(block);
+		iovmm_block_put(succ);
+		block = pred;
+	} else if (pred_free) {
+		iovmm_length(pred) += iovmm_length(block);
+		rb_erase(&block->all_node, &domain->all_blocks);
+		rb_erase(&pred->free_node, &domain->free_blocks);
+		iovmm_block_put(block);
+		block = pred;
+	} else if (succ_free) {
+		iovmm_length(block) += iovmm_length(succ);
+		rb_erase(&succ->all_node, &domain->all_blocks);
+		rb_erase(&succ->free_node, &domain->free_blocks);
+		iovmm_block_put(succ);
+	}
+
+	p = &domain->free_blocks.rb_node;
+	while (*p) {
+		struct tegra_iovmm_block *b;
+		parent = *p;
+		b = rb_entry(parent, struct tegra_iovmm_block, free_node);
+		if (iovmm_length(block) >= iovmm_length(b))
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&block->free_node, parent, p);
+	rb_insert_color(&block->free_node, &domain->free_blocks);
+	set_bit(BK_free, &block->flags);
+	spin_unlock(&domain->block_lock);
+}
+
+/* if the best-fit block is larger than the requested size, a remainder
+ * block will be created and inserted into the free list in its place.
+ * since all free blocks are stored in two trees the new block needs to be
+ * linked into both. */
+static void iovmm_split_free_block(struct tegra_iovmm_domain *domain,
+	struct tegra_iovmm_block *block, unsigned long size)
+{
+	struct rb_node **p = &domain->free_blocks.rb_node;
+	struct rb_node *parent = NULL;
+	struct tegra_iovmm_block *rem = kzalloc(sizeof(*rem), GFP_KERNEL);
+	struct tegra_iovmm_block *b;
+
+	if (!rem) return;
+
+	iovmm_start(rem) = iovmm_start(block) + size;
+	iovmm_length(rem) = iovmm_length(block) - size;
+	atomic_set(&rem->ref, 1);
+	iovmm_length(block) = size;
+
+	while (*p) {
+		parent = *p;
+		b = rb_entry(parent, struct tegra_iovmm_block, free_node);
+		if (iovmm_length(rem) >= iovmm_length(b))
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	set_bit(BK_free, &rem->flags);
+	rb_link_node(&rem->free_node, parent, p);
+	rb_insert_color(&rem->free_node, &domain->free_blocks);
+
+	p = &domain->all_blocks.rb_node;
+	parent = NULL;
+	while (*p) {
+		parent = *p;
+		b = rb_entry(parent, struct tegra_iovmm_block, all_node);
+		if (iovmm_start(rem) >= iovmm_start(b))
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&rem->all_node, parent, p);
+	rb_insert_color(&rem->all_node, &domain->all_blocks);
+}
+
+static struct tegra_iovmm_block *iovmm_alloc_block(
+	struct tegra_iovmm_domain *domain, unsigned long size)
+{
+	struct rb_node *n;
+	struct tegra_iovmm_block *b, *best;
+
+	BUG_ON(!size);
+	size = iovmm_align_up(domain->dev, size);
+	spin_lock(&domain->block_lock);
+	n = domain->free_blocks.rb_node;
+	best = NULL;
+	while (n) {
+		b = rb_entry(n, struct tegra_iovmm_block, free_node);
+		if (iovmm_length(b) < size) n = n->rb_right;
+		else if (iovmm_length(b) == size) {
+			best = b;
+			break;
+		} else {
+			best = b;
+			n = n->rb_left;
+		}
+	}
+	if (!best) {
+		spin_unlock(&domain->block_lock);
+		return NULL;
+	}
+	rb_erase(&best->free_node, &domain->free_blocks);
+	if (iovmm_length(best) >= size+MIN_SPLIT_BYTES(domain))
+		iovmm_split_free_block(domain, best, size);
+
+	clear_bit(BK_free, &best->flags);
+	atomic_inc(&best->ref);
+	spin_unlock(&domain->block_lock);
+	return best;
+}
+
+int tegra_iovmm_domain_init(struct tegra_iovmm_domain *domain,
+	struct tegra_iovmm_device *dev, tegra_iovmm_addr_t start,
+	tegra_iovmm_addr_t end)
+{
+	struct tegra_iovmm_block *b = kzalloc(sizeof(*b), GFP_KERNEL);
+	if (!b) return -ENOMEM;
+
+	domain->dev = dev;
+	atomic_set(&domain->clients, 0);
+	atomic_set(&domain->locks, 0);
+	atomic_set(&b->ref, 1);
+	spin_lock_init(&domain->block_lock);
+	init_rwsem(&domain->map_lock);
+	init_waitqueue_head(&domain->delay_lock);
+	iovmm_start(b) = iovmm_align_up(dev, start);
+	iovmm_length(b) = iovmm_align_down(dev, end) - iovmm_start(b);
+	set_bit(BK_free, &b->flags);
+	rb_link_node(&b->free_node, NULL, &domain->free_blocks.rb_node);
+	rb_insert_color(&b->free_node, &domain->free_blocks);
+	rb_link_node(&b->all_node, NULL, &domain->all_blocks.rb_node);
+	rb_insert_color(&b->all_node, &domain->all_blocks);
+	return 0;
+}
+
+struct tegra_iovmm_area *tegra_iovmm_create_vm(
+	struct tegra_iovmm_client *client, struct tegra_iovmm_area_ops *ops,
+	unsigned long size, pgprot_t pgprot)
+{
+	struct tegra_iovmm_block *b;
+	struct tegra_iovmm_device *dev;
+
+	if (!client) return NULL;
+
+	dev = client->domain->dev;
+
+	b = iovmm_alloc_block(client->domain, size);
+	if (!b) return NULL;
+
+	b->vm_area.domain = client->domain;
+	b->vm_area.pgprot = pgprot;
+	b->vm_area.ops = ops;
+
+	down_read(&b->vm_area.domain->map_lock);
+	if (ops && !test_bit(CL_locked, &client->flags)) {
+		set_bit(BK_map_dirty, &b->flags);
+		set_bit(DM_map_dirty, &client->domain->flags);
+	} else if (ops) {
+		if (dev->ops->map(dev, &b->vm_area))
+			pr_err("%s failed to map locked domain\n", __func__);
+	}
+	up_read(&b->vm_area.domain->map_lock);
+
+	return &b->vm_area;
+}
+
+void tegra_iovmm_vm_insert_pfn(struct tegra_iovmm_area *area,
+	tegra_iovmm_addr_t vaddr, unsigned long pfn)
+{
+	struct tegra_iovmm_device *dev = area->domain->dev;
+	BUG_ON(vaddr & ((1<<dev->pgsize_bits)-1));
+	BUG_ON(vaddr >= area->iovm_start + area->iovm_length);
+	BUG_ON(vaddr < area->iovm_start);
+	BUG_ON(area->ops);
+
+	dev->ops->map_pfn(dev, area, vaddr, pfn);
+}
+
+void tegra_iovmm_zap_vm(struct tegra_iovmm_area *vm)
+{
+	struct tegra_iovmm_block *b;
+	struct tegra_iovmm_device *dev;
+
+	b = container_of(vm, struct tegra_iovmm_block, vm_area);
+	dev = vm->domain->dev;
+	/* if the vm area mapping was deferred, don't unmap it since
+	 * the memory for the page tables it uses may not be allocated */
+	down_read(&vm->domain->map_lock);
+	if (!test_and_clear_bit(BK_map_dirty, &b->flags))
+		dev->ops->unmap(dev, vm, false);
+	up_read(&vm->domain->map_lock);
+}
+
+void tegra_iovmm_unzap_vm(struct tegra_iovmm_area *vm)
+{
+	struct tegra_iovmm_block *b;
+	struct tegra_iovmm_device *dev;
+
+	b = container_of(vm, struct tegra_iovmm_block, vm_area);
+	dev = vm->domain->dev;
+	if (!vm->ops) return;
+
+	down_read(&vm->domain->map_lock);
+	if (vm->ops) {
+		if (atomic_read(&vm->domain->locks))
+			dev->ops->map(dev, vm);
+		else {
+			set_bit(BK_map_dirty, &b->flags);
+			set_bit(DM_map_dirty, &vm->domain->flags);
+		}
+        }
+	up_read(&vm->domain->map_lock);
+}
+
+void tegra_iovmm_free_vm(struct tegra_iovmm_area *vm)
+{
+	struct tegra_iovmm_block *b;
+	struct tegra_iovmm_device *dev;
+	struct tegra_iovmm_domain *domain;
+
+	if (!vm) return;
+
+	b = container_of(vm, struct tegra_iovmm_block, vm_area);
+	domain = vm->domain;
+	dev = vm->domain->dev;
+	down_read(&domain->map_lock);
+	if (!test_and_clear_bit(BK_map_dirty, &b->flags))
+		dev->ops->unmap(dev, vm, true);
+	iovmm_free_block(domain, b);
+	up_read(&domain->map_lock);
+}
+
+struct tegra_iovmm_area *tegra_iovmm_area_get(struct tegra_iovmm_area *vm)
+{
+	struct tegra_iovmm_block *b;
+
+	BUG_ON(!vm);
+	b = container_of(vm, struct tegra_iovmm_block, vm_area);
+
+	atomic_inc(&b->ref);
+	return &b->vm_area;
+}
+
+void tegra_iovmm_area_put(struct tegra_iovmm_area *vm)
+{
+	struct tegra_iovmm_block *b;
+	BUG_ON(!vm);
+	b = container_of(vm, struct tegra_iovmm_block, vm_area);
+	iovmm_block_put(b);
+}
+
+struct tegra_iovmm_area *tegra_iovmm_find_area_get(
+	struct tegra_iovmm_client *client, tegra_iovmm_addr_t addr)
+{
+	struct rb_node *n;
+	struct tegra_iovmm_block *b = NULL;
+
+	if (!client) return NULL;
+
+	spin_lock(&client->domain->block_lock);
+	n = client->domain->all_blocks.rb_node;
+
+	while (n) {
+		b = rb_entry(n, struct tegra_iovmm_block, all_node);
+		if ((iovmm_start(b) <= addr) && (iovmm_end(b) >= addr)) {
+			if (test_bit(BK_free, &b->flags)) b = NULL;
+			break;
+		}
+		if (addr > iovmm_start(b))
+			n = n->rb_right;
+		else
+			n = n->rb_left;
+		b = NULL;
+	}
+	if (b) atomic_inc(&b->ref);
+	spin_unlock(&client->domain->block_lock);
+	if (!b) return NULL;
+	return &b->vm_area;
+}
+
+static int _iovmm_client_lock(struct tegra_iovmm_client *client)
+{
+	struct tegra_iovmm_device *dev;
+	struct tegra_iovmm_domain *domain;
+	int v;
+
+	if (unlikely(!client)) return -ENODEV;
+	if (unlikely(test_bit(CL_locked, &client->flags))) {
+		pr_err("attempting to relock client %s\n", client->name);
+		return 0;
+	}
+
+	domain = client->domain;
+	dev = domain->dev;
+	down_write(&domain->map_lock);
+	v = atomic_inc_return(&domain->locks);
+	/* if the device doesn't export the lock_domain function, the device
+	 * must guarantee that any valid domain will be locked. */
+	if (v==1 && dev->ops->lock_domain) {
+		if (dev->ops->lock_domain(dev, domain)) {
+			atomic_dec(&domain->locks);
+			up_write(&domain->map_lock);
+			return -EAGAIN;
+		}
+	}
+	if (test_and_clear_bit(DM_map_dirty, &domain->flags)) {
+		struct rb_node *n;
+		struct tegra_iovmm_block *b;
+
+		spin_lock(&domain->block_lock);
+		n = rb_first(&domain->all_blocks);
+		while (n) {
+			b = rb_entry(n, struct tegra_iovmm_block, all_node);
+			n = rb_next(n);
+			if (test_bit(BK_free, &b->flags))
+				continue;
+
+			if (test_and_clear_bit(BK_map_dirty, &b->flags)) {
+				if (!b->vm_area.ops) {
+					pr_err("%s: vm_area ops must exist for lazy maps\n", __func__);
+					continue;
+				}
+				dev->ops->map(dev, &b->vm_area);
+			}
+		}
+	}
+	set_bit(CL_locked, &client->flags);
+	up_write(&domain->map_lock);
+	return 0;
+}
+
+int tegra_iovmm_client_trylock(struct tegra_iovmm_client *client)
+{
+	return _iovmm_client_lock(client);
+}
+
+int tegra_iovmm_client_lock(struct tegra_iovmm_client *client)
+{
+	int ret;
+
+	if (!client) return;
+
+	ret = wait_event_interruptible(client->domain->delay_lock,
+		_iovmm_client_lock(client)!=-EAGAIN);
+
+	if (ret==-ERESTARTSYS) return -EINTR;
+
+	return ret;
+}
+
+void tegra_iovmm_client_unlock(struct tegra_iovmm_client *client)
+{
+	struct tegra_iovmm_device *dev;
+	struct tegra_iovmm_domain *domain;
+	int do_wake = 0;
+
+	if (!client) return;
+
+	if (!test_and_clear_bit(CL_locked, &client->flags)) {
+		pr_err("unlocking unlocked client %s\n", client->name);
+		return;
+	}
+
+	domain = client->domain;
+	dev = domain->dev;
+        down_write(&domain->map_lock);
+	if (!atomic_dec_return(&client->domain->locks)) {
+		if (dev->ops->unlock_domain)
+			dev->ops->unlock_domain(dev, domain);
+		do_wake = 1;
+	}
+	up_write(&domain->map_lock);
+	if (do_wake) wake_up(&domain->delay_lock);
+}
+
+size_t tegra_iovmm_get_vm_size(struct tegra_iovmm_client *client)
+{
+	struct tegra_iovmm_domain *domain;
+	struct rb_node *n;
+	struct tegra_iovmm_block *b;
+	size_t size = 0;
+
+	if (!client) return 0;
+
+	domain = client->domain;
+
+	spin_lock(&domain->block_lock);
+	n = rb_first(&domain->all_blocks);
+	while (n) {
+		b = rb_entry(n, struct tegra_iovmm_block, all_node);
+		n = rb_next(n);
+		size += iovmm_length(b);
+	}
+	spin_unlock(&domain->block_lock);
+
+	return size;
+}
+
+void tegra_iovmm_free_client(struct tegra_iovmm_client *client)
+{
+	struct tegra_iovmm_device *dev;
+	if (!client) return;
+
+	BUG_ON(!client->domain || !client->domain->dev);
+
+	dev = client->domain->dev;
+
+	if (test_and_clear_bit(CL_locked, &client->flags)) {
+		pr_err("freeing locked client %s\n", client->name);
+		if (!atomic_dec_return(&client->domain->locks)) {
+			down_write(&client->domain->map_lock);
+			if (dev->ops->unlock_domain)
+				dev->ops->unlock_domain(dev, client->domain);
+			up_write(&client->domain->map_lock);
+			wake_up(&client->domain->delay_lock);
+		}
+	}
+	mutex_lock(&iovmm_list_lock);
+	if (!atomic_dec_return(&client->domain->clients))
+		if (dev->ops->free_domain)
+			dev->ops->free_domain(dev, client->domain);
+	list_del(&client->list);
+	if (list_empty(&client->group->client_list)) {
+		list_del(&client->group->group_list);
+		if (client->group->name) kfree(client->group->name);
+		kfree(client->group);
+	}
+	kfree(client->name);
+	kfree(client);
+	mutex_unlock(&iovmm_list_lock);
+}
+
+struct tegra_iovmm_client *tegra_iovmm_alloc_client(const char *name,
+	const char *share_group)
+{
+	struct tegra_iovmm_client *c = kzalloc(sizeof(*c), GFP_KERNEL);
+	struct iovmm_share_group *grp = NULL;
+	struct tegra_iovmm_device *dev;
+
+	if (!c) return NULL;
+	c->name = kstrdup(name, GFP_KERNEL);
+	if (!c->name) goto fail;
+
+	mutex_lock(&iovmm_list_lock);
+	if (share_group) {
+		list_for_each_entry(grp, &iovmm_groups, group_list) {
+			if (grp->name && !strcmp(grp->name, share_group))
+				break;
+		}
+	}
+	if (!grp || strcmp(grp->name, share_group)) {
+		grp = kzalloc(sizeof(*grp), GFP_KERNEL);
+		if (!grp) goto fail_lock;
+		grp->name = (share_group) ? kstrdup(share_group, GFP_KERNEL) : NULL;
+		if (share_group && !grp->name) {
+			kfree(grp);
+			goto fail_lock;
+		}
+		list_for_each_entry(dev, &iovmm_devices, list) {
+			grp->domain = dev->ops->alloc_domain(dev, c);
+			if (grp->domain) break;
+		}
+		if (!grp->domain) {
+			pr_err("%s: alloc_domain failed for %s\n",
+				__func__, c->name);
+			dump_stack();
+			if (grp->name) kfree(grp->name);
+			kfree(grp);
+			grp = NULL;
+			goto fail_lock;
+                }
+		spin_lock_init(&grp->lock);
+		INIT_LIST_HEAD(&grp->client_list);
+		list_add_tail(&grp->group_list, &iovmm_groups);
+	}
+
+	atomic_inc(&grp->domain->clients);
+	c->group = grp;
+	c->domain = grp->domain;
+	spin_lock(&grp->lock);
+	list_add_tail(&c->list, &grp->client_list);
+	spin_unlock(&grp->lock);
+	mutex_unlock(&iovmm_list_lock);
+	return c;
+
+fail_lock:
+	mutex_unlock(&iovmm_list_lock);
+fail:
+	if (c) {
+		if (c->name) kfree(c->name);
+		kfree(c);
+	}
+	return NULL;
+}
+
+int tegra_iovmm_register(struct tegra_iovmm_device *dev)
+{
+	BUG_ON(!dev);
+	mutex_lock(&iovmm_list_lock);
+	if (list_empty(&iovmm_devices)) {
+		create_proc_read_entry("iovmminfo", S_IRUGO, NULL,
+			tegra_iovmm_read_proc, NULL);
+	}
+	list_add_tail(&dev->list, &iovmm_devices);
+	mutex_unlock(&iovmm_list_lock);
+	printk("%s: added %s\n", __func__, dev->name);
+	return 0;
+}
+
+int tegra_iovmm_unregister(struct tegra_iovmm_device *dev)
+{
+	mutex_lock(&iovmm_list_lock);
+	list_del(&dev->list);
+	mutex_unlock(&iovmm_list_lock);
+	return 0;
+}
diff --git a/arch/arm/mach-tegra/iovmm.txt b/arch/arm/mach-tegra/iovmm.txt
new file mode 100644
index 000000000000..b71b73557e0b
--- /dev/null
+++ b/arch/arm/mach-tegra/iovmm.txt
@@ -0,0 +1,202 @@
+Tegra I/O Virtual Memory Manager Interface
+==========================================
+
+The Tegra IOVMM is an interface to allow device drivers and subsystems in
+the kernel to manage the virtual memory spaces visible to I/O devices.
+
+The interface has been designed to be scalable to allow for I/O virtual
+memory hardware which exists in one or more limited apertures of the address
+space (e.g., a small aperture in physical address space which can perform
+MMU-like remapping) up to complete virtual addressing with multiple
+address spaces and memory protection.
+
+The interface has been designed to be similar to the Linux virtual memory
+system; however, operations which would be difficult to implement or
+nonsensical for DMA devices (e.g., copy-on-write) are not present, and
+APIs have been added to allow for management of multiple simultaneous
+active address spaces.
+
+The API is broken into four principal objects: areas, clients, domains and
+devices.
+
+
+Areas
+=====
+
+An area is a contiguous region of the virtual address space which can be
+filled with virtual-to-physical translations (and, optionally, protection
+attributes). The virtual address of the area can be queried and used for
+DMA operations by the client which created it.
+
+As with the Linux vm_area structures, it is the responsibility of whichever
+code creates an area to ensure that it is populated with appropriate
+translations.
+
+
+Domains
+=======
+
+A domain in the IOVMM system is similar to a process in a standard CPU
+virtual memory system; it represents the entire range of virtual addresses
+which may be allocated and used for translation. Depending on hardware
+capabilities, one or more domains may be resident and available for
+translation. IOVMM areas are allocated from IOVMM domains.
+
+Whenever a DMA operation is performed to or from an IOVMM area, its parent
+domain must be made resident prior to commencing the operation.
+
+
+Clients
+=======
+
+I/O VMM clients represent any entity which needs to be able to allocate
+and map system memory into I/O virtual space. Clients are created by name
+and may be created as part of a "share group," where all clients created
+in the same share group will observe the same I/O virtual space (i.e., all
+will use the same IOVMM domain). This is similar to threads inside a process
+in the CPU virtual memory manager.
+
+The callers of the I/O VMM system are responsible for deciding on the
+granularity of client creation and share group definition; depending on the
+specific usage model expected by the caller, it may be appropriate to create
+an IOVMM client per task (if the caller represents an ioctl'able interface
+to user land), an IOVMM client per driver instance, a common IOVMM client
+for an entire bus, or a global IOVMM client for an OS subsystem (e.g., the DMA
+mapping interface).
+
+Each client is responsible for ensuring that its IOVMM client's translation is
+resident on the system prior to performing DMA operations using the IOVMM
+addresses. This is accomplished by preceding all DMA operations for the client
+with a call to tegra_iovmm_client_lock (or tegra_iovmm_client_trylock),
+and following all operations (once complete) with a call to
+tegra_iovmm_client_unlock. In this regard, clients are cooperatively context-
+switched, and are expected to behave appropriately.
+
+
+Devices
+=======
+
+I/O VMM devices are the physical hardware which is responsible for performing
+the I/O virtual-to-physical translation.
+
+Devices are responsible for domain management: the mapping and unmapping
+operations needed to make translations resident in the domain (including
+any TLB shootdown or cache invalidation needed to ensure coherency), locking
+and unlocking domains as they are made resident by clients into the devices'
+address space(s), and allocating and deallocating the domain objects.
+
+Devices are responsible for the allocation and deallocation of domains to
+allow coalescing of multiple client share groups into a single domain. For
+example, if the device's hardware only allows a single address space to
+be translated system-wide, performing full flushes and invalidates of the
+translation at every client switch may be prohibitively expensive. In these
+circumstances, a legal implementation of the IOVMM interface includes
+returning the same domain for all clients on the system (regardless of
+the originally-specified share group).
+
+In this respect, a client can be assured that it will share an address space
+with all of the other clients in its share group; however, it may also share
+this address space with other clients, too.
+
+Multiple devices may be present in a system; a device should return a NULL
+domain if it is incapable of servicing the client when it is asked to
+allocate a domain.
+
+----------------------------------------------------------------------------
+
+IOVMM Client API
+================
+
+tegra_iovmm_alloc_client - Called to create a new IOVMM client object; the
+ implementation may create a new domain or return an existing one depending on
+ both the device and the share group.
+
+tegra_iovmm_free_client - Frees a client.
+
+tegra_iovmm_client_lock - Makes a client's translations resident in the IOVMM
+ device for subsequent DMA operations. May block if the device is incapable
+ of context-switching the client when it is called. Returns -EINTR if the
+ waiting thread is interrupted before the client is locked.
+
+tegra_iovmm_client_trylock - Non-blocking version of tegra_iovmm_client_lock
+
+tegra_iovmm_client_unlock - Called by clients after DMA operations on IOVMM-
+ translated addresses is complete; allows IOVMM system to context-switch the
+ current client out of the device if needed.
+
+tegra_iovmm_create_vm - Called to allocate an IOVMM area. If
+ lazy / demand-loading of pages is desired, clients should supply a pointer
+ to a tegra_iovmm_area_ops structure providing callback functions to load, pin
+ and unpin the physical pages which will be mapped into this IOVMM region.
+
+tegra_iovmm_get_vm_size - Called to query the total size of an IOVMM client
+
+tegra_iovmm_free_vm - Called to free a IOVMM area, releasing any pinned
+ physical pages mapped by it and to decommit any resources (memory for
+ PTEs / PDEs) required by the VM area.
+
+tegra_iovmm_vm_insert_pfn - Called to insert an exact pfn (system memory
+ physical page) into the area at a specific virtual address. Illegal to call
+ if the IOVMM area was originally created with lazy / demand-loading.
+
+tegra_iovmm_zap_vm - Called to mark all mappings in the IOVMM area as
+ invalid / no-access, but continues to consume the I/O virtual address space.
+ For lazy / demand-loaded IOVMM areas, a zapped region will not be reloaded
+ until it has been unzapped; DMA operations using the affected translations
+ may fault (if supported by the device).
+
+tegra_iovmm_unzap_vm - Called to re-enable lazy / demand-loading of pages
+ for a previously-zapped IOVMM area.
+
+tegra_iovmm_find_area_get - Called to find the IOVMM area object
+ corresponding to the specified I/O virtual address, or NULL if the address
+ is not allocated in the client's address space. Increases the reference count
+ on the IOVMM area object
+
+tegra_iovmm_area_get - Called to increase the reference count on the IOVMM
+ area object
+
+tegra_iovmm_area_put - Called to decrease the reference count on the IOVMM
+ area object
+
+
+IOVMM Device API
+================
+
+tegra_iovmm_register - Called to register a new IOVMM device with the IOVMM
+ manager
+
+tegra_iovmm_unregister - Called to remove an IOVMM device from the IOVMM
+ manager (unspecified behavior if called while a translation is active and / or
+ in-use)
+
+tegra_iovmm_domain_init - Called to initialize all of the IOVMM manager's
+ data structures (block trees, etc.) after allocating a new domain
+
+
+IOVMM Device HAL
+================
+
+map - Called to inform the device about a new lazy-mapped IOVMM area. Devices
+ may load the entire VM area when this is called, or at any time prior to
+ the completion of the first read or write operation using the translation.
+
+unmap - Called to zap or to decommit translations
+
+map_pfn - Called to insert a specific virtual-to-physical translation in the
+ IOVMM area
+
+lock_domain - Called to make a domain resident; should return 0 if the
+ domain was successfully context-switched, non-zero if the operation can
+ not be completed (e.g., all available simultaneous hardware translations are
+ locked). If the device can guarantee that every domain it allocates is
+ always usable, this function may be NULL.
+
+unlock_domain - Releases a domain from residency, allows the hardware
+ translation to be used by other domains.
+
+alloc_domain - Called to allocate a new domain; allowed to return an
+ existing domain
+
+free_domain - Called to free a domain.
+
author	Gary King <GKing@nvidia.com>	2010-02-03 20:49:02 -0800
committer	Gary King <gking@nvidia.com>	2010-03-02 17:22:49 -0800
commit	9e6f4b60adfec9c1d627aed742c31b46615dec92 (patch)
tree	3c166c8c7c14ba51356b5dcdcd4dbc99c15dd485 /arch
parent	e0fba00523aeacf1372351f8bc9c5338dc5a8a1e (diff)