Merge remote-tracking branch 'origin/dev/k3.3-rc1-iommu' into android-tegra-nv-3.1

Change-Id: I9001bb291779f107bbcb593d48f9f0f734074d0e
author: Rohan Somvanshi <rsomvanshi@nvidia.com> 2012-02-01 16:03:06 +0530
committer: Rohan Somvanshi <rsomvanshi@nvidia.com> 2012-02-01 16:03:06 +0530
commit: 13ab2df1dfed5153899b98b1417f52de0ecc70cd (patch)
tree: 9b47b72d23fef02d1b489a845547106430552be9 /drivers/iommu
parent: c8932c9184978b42398fd582592315cbbf507415 (diff)
parent: ac2b520c46800dec16f3c23d0cf8759826750937 (diff)
15 files changed, 5187 insertions, 368 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index a2c55bf57903..b394c6c5fbe7 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -34,7 +34,9 @@ config AMD_IOMMU
 	bool "AMD IOMMU support"
 	select SWIOTLB
 	select PCI_MSI
-	select PCI_IOV
+	select PCI_ATS
+	select PCI_PRI
+	select PCI_PASID
 	select IOMMU_API
 	depends on X86_64 && PCI && ACPI
 	---help---
@@ -58,11 +60,24 @@ config AMD_IOMMU_STATS
 	  information to userspace via debugfs.
 	  If unsure, say N.
 
+config AMD_IOMMU_V2
+	tristate "AMD IOMMU Version 2 driver (EXPERIMENTAL)"
+	depends on AMD_IOMMU && PROFILING && EXPERIMENTAL
+	select MMU_NOTIFIER
+	---help---
+	  This option enables support for the AMD IOMMUv2 features of the IOMMU
+	  hardware. Select this option if you want to use devices that support
+	  the the PCI PRI and PASID interface.
+
 # Intel IOMMU support
-config DMAR
-	bool "Support for DMA Remapping Devices"
+config DMAR_TABLE
+	bool
+
+config INTEL_IOMMU
+	bool "Support for Intel IOMMU using DMA Remapping Devices"
 	depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC)
 	select IOMMU_API
+	select DMAR_TABLE
 	help
 	  DMA remapping (DMAR) devices support enables independent address
 	  translations for Direct Memory Access (DMA) from devices.
@@ -70,18 +85,18 @@ config DMAR
 	  and include PCI device scope covered by these DMA
 	  remapping devices.
 
-config DMAR_DEFAULT_ON
+config INTEL_IOMMU_DEFAULT_ON
 	def_bool y
-	prompt "Enable DMA Remapping Devices by default"
-	depends on DMAR
+	prompt "Enable Intel DMA Remapping Devices by default"
+	depends on INTEL_IOMMU
 	help
 	  Selecting this option will enable a DMAR device at boot time if
 	  one is found. If this option is not selected, DMAR support can
 	  be enabled by passing intel_iommu=on to the kernel.
 
-config DMAR_BROKEN_GFX_WA
+config INTEL_IOMMU_BROKEN_GFX_WA
 	bool "Workaround broken graphics drivers (going away soon)"
-	depends on DMAR && BROKEN && X86
+	depends on INTEL_IOMMU && BROKEN && X86
 	---help---
 	  Current Graphics drivers tend to use physical address
 	  for DMA and avoid using DMA APIs. Setting this config
@@ -90,23 +105,43 @@ config DMAR_BROKEN_GFX_WA
 	  to use physical addresses for DMA, at least until this
 	  option is removed in the 2.6.32 kernel.
 
-config DMAR_FLOPPY_WA
+config INTEL_IOMMU_FLOPPY_WA
 	def_bool y
-	depends on DMAR && X86
+	depends on INTEL_IOMMU && X86
 	---help---
 	  Floppy disk drivers are known to bypass DMA API calls
 	  thereby failing to work when IOMMU is enabled. This
 	  workaround will setup a 1:1 mapping for the first
 	  16MiB to make floppy (an ISA device) work.
 
-config INTR_REMAP
+config IRQ_REMAP
 	bool "Support for Interrupt Remapping (EXPERIMENTAL)"
 	depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
+	select DMAR_TABLE
 	---help---
 	  Supports Interrupt remapping for IO-APIC and MSI devices.
 	  To use x2apic mode in the CPU's which support x2APIC enhancements or
 	  to support platforms with CPU's having > 8 bit APIC ID, say Y.
 
+# OMAP IOMMU support
+config OMAP_IOMMU
+	bool "OMAP IOMMU Support"
+	depends on ARCH_OMAP
+	select IOMMU_API
+
+config OMAP_IOVMM
+	tristate "OMAP IO Virtual Memory Manager Support"
+	depends on OMAP_IOMMU
+
+config OMAP_IOMMU_DEBUG
+       tristate "Export OMAP IOMMU/IOVMM internals in DebugFS"
+       depends on OMAP_IOVMM && DEBUG_FS
+       help
+         Select this to see extensive information about
+         the internal state of OMAP IOMMU/IOVMM in debugfs.
+
+         Say N unless you know you need this.
+
 config TEGRA_IOMMU_SMMU
 	bool "Tegra SMMU IOMMU Support"
 	depends on ARCH_TEGRA
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 74080aa1761f..7ad7a3bc1242 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,7 +1,12 @@
 obj-$(CONFIG_IOMMU_API) += iommu.o
 obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
 obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
-obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
-obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
+obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
+obj-$(CONFIG_DMAR_TABLE) += dmar.o
+obj-$(CONFIG_INTEL_IOMMU) += iova.o intel-iommu.o
+obj-$(CONFIG_IRQ_REMAP) += intr_remapping.o
+obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
+obj-$(CONFIG_OMAP_IOVMM) += omap-iovmm.o
+obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
 obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
 obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index cc79045bc527..cce1f03b8895 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -17,6 +17,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
+#include <linux/ratelimit.h>
 #include <linux/pci.h>
 #include <linux/pci-ats.h>
 #include <linux/bitmap.h>
@@ -28,6 +29,8 @@
 #include <linux/iommu.h>
 #include <linux/delay.h>
 #include <linux/amd-iommu.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
 #include <asm/msidef.h>
 #include <asm/proto.h>
 #include <asm/iommu.h>
@@ -41,6 +44,24 @@
 
 #define LOOP_TIMEOUT	100000
 
+/*
+ * This bitmap is used to advertise the page sizes our hardware support
+ * to the IOMMU core, which will then use this information to split
+ * physically contiguous memory regions it is mapping into page sizes
+ * that we support.
+ *
+ * Traditionally the IOMMU core just handed us the mappings directly,
+ * after making sure the size is an order of a 4KiB page and that the
+ * mapping has natural alignment.
+ *
+ * To retain this behavior, we currently advertise that we support
+ * all page sizes that are an order of 4KiB.
+ *
+ * If at some point we'd like to utilize the IOMMU core's new behavior,
+ * we could change this to advertise the real page sizes we support.
+ */
+#define AMD_IOMMU_PGSIZES	(~0xFFFUL)
+
 static DEFINE_RWLOCK(amd_iommu_devtable_lock);
 
 /* A list of preallocated protection domains */
@@ -59,6 +80,9 @@ static struct protection_domain *pt_domain;
 
 static struct iommu_ops amd_iommu_ops;
 
+static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
+int amd_iommu_max_glx_val = -1;
+
 /*
  * general struct to manage commands send to an IOMMU
  */
@@ -67,6 +91,7 @@ struct iommu_cmd {
 };
 
 static void update_domain(struct protection_domain *domain);
+static int __init alloc_passthrough_domain(void);
 
 /****************************************************************************
  *
@@ -147,6 +172,33 @@ static struct iommu_dev_data *get_dev_data(struct device *dev)
 	return dev->archdata.iommu;
 }
 
+static bool pci_iommuv2_capable(struct pci_dev *pdev)
+{
+	static const int caps[] = {
+		PCI_EXT_CAP_ID_ATS,
+		PCI_EXT_CAP_ID_PRI,
+		PCI_EXT_CAP_ID_PASID,
+	};
+	int i, pos;
+
+	for (i = 0; i < 3; ++i) {
+		pos = pci_find_ext_capability(pdev, caps[i]);
+		if (pos == 0)
+			return false;
+	}
+
+	return true;
+}
+
+static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
+{
+	struct iommu_dev_data *dev_data;
+
+	dev_data = get_dev_data(&pdev->dev);
+
+	return dev_data->errata & (1 << erratum) ? true : false;
+}
+
 /*
  * In this function the list of preallocated protection domains is traversed to
  * find the domain for a specific device
@@ -204,6 +256,7 @@ static bool check_device(struct device *dev)
 
 static int iommu_init_device(struct device *dev)
 {
+	struct pci_dev *pdev = to_pci_dev(dev);
 	struct iommu_dev_data *dev_data;
 	u16 alias;
 
@@ -228,6 +281,13 @@ static int iommu_init_device(struct device *dev)
 		dev_data->alias_data = alias_data;
 	}
 
+	if (pci_iommuv2_capable(pdev)) {
+		struct amd_iommu *iommu;
+
+		iommu              = amd_iommu_rlookup_table[dev_data->devid];
+		dev_data->iommu_v2 = iommu->is_iommu_v2;
+	}
+
 	dev->archdata.iommu = dev_data;
 
 	return 0;
@@ -317,6 +377,11 @@ DECLARE_STATS_COUNTER(domain_flush_single);
 DECLARE_STATS_COUNTER(domain_flush_all);
 DECLARE_STATS_COUNTER(alloced_io_mem);
 DECLARE_STATS_COUNTER(total_map_requests);
+DECLARE_STATS_COUNTER(complete_ppr);
+DECLARE_STATS_COUNTER(invalidate_iotlb);
+DECLARE_STATS_COUNTER(invalidate_iotlb_all);
+DECLARE_STATS_COUNTER(pri_requests);
+
 
 static struct dentry *stats_dir;
 static struct dentry *de_fflush;
@@ -351,6 +416,10 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&domain_flush_all);
 	amd_iommu_stats_add(&alloced_io_mem);
 	amd_iommu_stats_add(&total_map_requests);
+	amd_iommu_stats_add(&complete_ppr);
+	amd_iommu_stats_add(&invalidate_iotlb);
+	amd_iommu_stats_add(&invalidate_iotlb_all);
+	amd_iommu_stats_add(&pri_requests);
 }
 
 #endif
@@ -365,8 +434,8 @@ static void dump_dte_entry(u16 devid)
 {
 	int i;
 
-	for (i = 0; i < 8; ++i)
-		pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
+	for (i = 0; i < 4; ++i)
+		pr_err("AMD-Vi: DTE[%d]: %016llx\n", i,
 			amd_iommu_dev_table[devid].data[i]);
 }
 
@@ -461,12 +530,84 @@ static void iommu_poll_events(struct amd_iommu *iommu)
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
+static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head)
+{
+	struct amd_iommu_fault fault;
+	volatile u64 *raw;
+	int i;
+
+	INC_STATS_COUNTER(pri_requests);
+
+	raw = (u64 *)(iommu->ppr_log + head);
+
+	/*
+	 * Hardware bug: Interrupt may arrive before the entry is written to
+	 * memory. If this happens we need to wait for the entry to arrive.
+	 */
+	for (i = 0; i < LOOP_TIMEOUT; ++i) {
+		if (PPR_REQ_TYPE(raw[0]) != 0)
+			break;
+		udelay(1);
+	}
+
+	if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
+		pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n");
+		return;
+	}
+
+	fault.address   = raw[1];
+	fault.pasid     = PPR_PASID(raw[0]);
+	fault.device_id = PPR_DEVID(raw[0]);
+	fault.tag       = PPR_TAG(raw[0]);
+	fault.flags     = PPR_FLAGS(raw[0]);
+
+	/*
+	 * To detect the hardware bug we need to clear the entry
+	 * to back to zero.
+	 */
+	raw[0] = raw[1] = 0;
+
+	atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
+}
+
+static void iommu_poll_ppr_log(struct amd_iommu *iommu)
+{
+	unsigned long flags;
+	u32 head, tail;
+
+	if (iommu->ppr_log == NULL)
+		return;
+
+	spin_lock_irqsave(&iommu->lock, flags);
+
+	head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
+	tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
+
+	while (head != tail) {
+
+		/* Handle PPR entry */
+		iommu_handle_ppr_entry(iommu, head);
+
+		/* Update and refresh ring-buffer state*/
+		head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
+		writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
+		tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
+	}
+
+	/* enable ppr interrupts again */
+	writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
+
+	spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
 irqreturn_t amd_iommu_int_thread(int irq, void *data)
 {
 	struct amd_iommu *iommu;
 
-	for_each_iommu(iommu)
+	for_each_iommu(iommu) {
 		iommu_poll_events(iommu);
+		iommu_poll_ppr_log(iommu);
+	}
 
 	return IRQ_HANDLED;
 }
@@ -595,6 +736,60 @@ static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
 		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
 }
 
+static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid,
+				  u64 address, bool size)
+{
+	memset(cmd, 0, sizeof(*cmd));
+
+	address &= ~(0xfffULL);
+
+	cmd->data[0]  = pasid & PASID_MASK;
+	cmd->data[1]  = domid;
+	cmd->data[2]  = lower_32_bits(address);
+	cmd->data[3]  = upper_32_bits(address);
+	cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+	cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
+	if (size)
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
+}
+
+static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,
+				  int qdep, u64 address, bool size)
+{
+	memset(cmd, 0, sizeof(*cmd));
+
+	address &= ~(0xfffULL);
+
+	cmd->data[0]  = devid;
+	cmd->data[0] |= (pasid & 0xff) << 16;
+	cmd->data[0] |= (qdep  & 0xff) << 24;
+	cmd->data[1]  = devid;
+	cmd->data[1] |= ((pasid >> 8) & 0xfff) << 16;
+	cmd->data[2]  = lower_32_bits(address);
+	cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
+	cmd->data[3]  = upper_32_bits(address);
+	if (size)
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+	CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
+}
+
+static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid,
+			       int status, int tag, bool gn)
+{
+	memset(cmd, 0, sizeof(*cmd));
+
+	cmd->data[0]  = devid;
+	if (gn) {
+		cmd->data[1]  = pasid & PASID_MASK;
+		cmd->data[2]  = CMD_INV_IOMMU_PAGES_GN_MASK;
+	}
+	cmd->data[3]  = tag & 0x1ff;
+	cmd->data[3] |= (status & PPR_STATUS_MASK) << PPR_STATUS_SHIFT;
+
+	CMD_SET_TYPE(cmd, CMD_COMPLETE_PPR);
+}
+
 static void build_inv_all(struct iommu_cmd *cmd)
 {
 	memset(cmd, 0, sizeof(*cmd));
@@ -1496,6 +1691,48 @@ static void free_pagetable(struct protection_domain *domain)
 	domain->pt_root = NULL;
 }
 
+static void free_gcr3_tbl_level1(u64 *tbl)
+{
+	u64 *ptr;
+	int i;
+
+	for (i = 0; i < 512; ++i) {
+		if (!(tbl[i] & GCR3_VALID))
+			continue;
+
+		ptr = __va(tbl[i] & PAGE_MASK);
+
+		free_page((unsigned long)ptr);
+	}
+}
+
+static void free_gcr3_tbl_level2(u64 *tbl)
+{
+	u64 *ptr;
+	int i;
+
+	for (i = 0; i < 512; ++i) {
+		if (!(tbl[i] & GCR3_VALID))
+			continue;
+
+		ptr = __va(tbl[i] & PAGE_MASK);
+
+		free_gcr3_tbl_level1(ptr);
+	}
+}
+
+static void free_gcr3_table(struct protection_domain *domain)
+{
+	if (domain->glx == 2)
+		free_gcr3_tbl_level2(domain->gcr3_tbl);
+	else if (domain->glx == 1)
+		free_gcr3_tbl_level1(domain->gcr3_tbl);
+	else if (domain->glx != 0)
+		BUG();
+
+	free_page((unsigned long)domain->gcr3_tbl);
+}
+
 /*
  * Free a domain, only used if something went wrong in the
  * allocation path and we need to free an already allocated page table
@@ -1582,20 +1819,52 @@ static bool dma_ops_domain(struct protection_domain *domain)
 
 static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
 {
-	u64 pte_root = virt_to_phys(domain->pt_root);
-	u32 flags = 0;
+	u64 pte_root = 0;
+	u64 flags = 0;
+
+	if (domain->mode != PAGE_MODE_NONE)
+		pte_root = virt_to_phys(domain->pt_root);
 
 	pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
 		    << DEV_ENTRY_MODE_SHIFT;
 	pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
 
+	flags = amd_iommu_dev_table[devid].data[1];
+
 	if (ats)
 		flags |= DTE_FLAG_IOTLB;
 
-	amd_iommu_dev_table[devid].data[3] |= flags;
-	amd_iommu_dev_table[devid].data[2]  = domain->id;
-	amd_iommu_dev_table[devid].data[1]  = upper_32_bits(pte_root);
-	amd_iommu_dev_table[devid].data[0]  = lower_32_bits(pte_root);
+	if (domain->flags & PD_IOMMUV2_MASK) {
+		u64 gcr3 = __pa(domain->gcr3_tbl);
+		u64 glx  = domain->glx;
+		u64 tmp;
+
+		pte_root |= DTE_FLAG_GV;
+		pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT;
+
+		/* First mask out possible old values for GCR3 table */
+		tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
+		flags    &= ~tmp;
+
+		tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
+		flags    &= ~tmp;
+
+		/* Encode GCR3 table into DTE */
+		tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A;
+		pte_root |= tmp;
+
+		tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B;
+		flags    |= tmp;
+
+		tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
+		flags    |= tmp;
+	}
+
+	flags &= ~(0xffffUL);
+	flags |= domain->id;
+
+	amd_iommu_dev_table[devid].data[1]  = flags;
+	amd_iommu_dev_table[devid].data[0]  = pte_root;
 }
 
 static void clear_dte_entry(u16 devid)
@@ -1603,7 +1872,6 @@ static void clear_dte_entry(u16 devid)
 	/* remove entry from the device table seen by the hardware */
 	amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
 	amd_iommu_dev_table[devid].data[1] = 0;
-	amd_iommu_dev_table[devid].data[2] = 0;
 
 	amd_iommu_apply_erratum_63(devid);
 }
@@ -1696,6 +1964,93 @@ out_unlock:
 	return ret;
 }
 
+
+static void pdev_iommuv2_disable(struct pci_dev *pdev)
+{
+	pci_disable_ats(pdev);
+	pci_disable_pri(pdev);
+	pci_disable_pasid(pdev);
+}
+
+/* FIXME: Change generic reset-function to do the same */
+static int pri_reset_while_enabled(struct pci_dev *pdev)
+{
+	u16 control;
+	int pos;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
+	if (!pos)
+		return -EINVAL;
+
+	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+	control |= PCI_PRI_CTRL_RESET;
+	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
+
+	return 0;
+}
+
+static int pdev_iommuv2_enable(struct pci_dev *pdev)
+{
+	bool reset_enable;
+	int reqs, ret;
+
+	/* FIXME: Hardcode number of outstanding requests for now */
+	reqs = 32;
+	if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE))
+		reqs = 1;
+	reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET);
+
+	/* Only allow access to user-accessible pages */
+	ret = pci_enable_pasid(pdev, 0);
+	if (ret)
+		goto out_err;
+
+	/* First reset the PRI state of the device */
+	ret = pci_reset_pri(pdev);
+	if (ret)
+		goto out_err;
+
+	/* Enable PRI */
+	ret = pci_enable_pri(pdev, reqs);
+	if (ret)
+		goto out_err;
+
+	if (reset_enable) {
+		ret = pri_reset_while_enabled(pdev);
+		if (ret)
+			goto out_err;
+	}
+
+	ret = pci_enable_ats(pdev, PAGE_SHIFT);
+	if (ret)
+		goto out_err;
+
+	return 0;
+
+out_err:
+	pci_disable_pri(pdev);
+	pci_disable_pasid(pdev);
+
+	return ret;
+}
+
+/* FIXME: Move this to PCI code */
+#define PCI_PRI_TLP_OFF		(1 << 2)
+
+bool pci_pri_tlp_required(struct pci_dev *pdev)
+{
+	u16 control;
+	int pos;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
+	if (!pos)
+		return false;
+
+	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+
+	return (control & PCI_PRI_TLP_OFF) ? true : false;
+}
+
 /*
  * If a device is not yet associated with a domain, this function does
  * assigns it visible for the hardware
@@ -1710,7 +2065,18 @@ static int attach_device(struct device *dev,
 
 	dev_data = get_dev_data(dev);
 
-	if (amd_iommu_iotlb_sup && pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
+	if (domain->flags & PD_IOMMUV2_MASK) {
+		if (!dev_data->iommu_v2 || !dev_data->passthrough)
+			return -EINVAL;
+
+		if (pdev_iommuv2_enable(pdev) != 0)
+			return -EINVAL;
+
+		dev_data->ats.enabled = true;
+		dev_data->ats.qdep    = pci_ats_queue_depth(pdev);
+		dev_data->pri_tlp     = pci_pri_tlp_required(pdev);
+	} else if (amd_iommu_iotlb_sup &&
+		   pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
 		dev_data->ats.enabled = true;
 		dev_data->ats.qdep    = pci_ats_queue_depth(pdev);
 	}
@@ -1760,7 +2126,7 @@ static void __detach_device(struct iommu_dev_data *dev_data)
 	 * passthrough domain if it is detached from any other domain.
 	 * Make sure we can deassign from the pt_domain itself.
 	 */
-	if (iommu_pass_through &&
+	if (dev_data->passthrough &&
 	    (dev_data->domain == NULL && domain != pt_domain))
 		__attach_device(dev_data, pt_domain);
 }
@@ -1770,20 +2136,24 @@ static void __detach_device(struct iommu_dev_data *dev_data)
  */
 static void detach_device(struct device *dev)
 {
+	struct protection_domain *domain;
 	struct iommu_dev_data *dev_data;
 	unsigned long flags;
 
 	dev_data = get_dev_data(dev);
+	domain   = dev_data->domain;
 
 	/* lock device table */
 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	__detach_device(dev_data);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 
-	if (dev_data->ats.enabled) {
+	if (domain->flags & PD_IOMMUV2_MASK)
+		pdev_iommuv2_disable(to_pci_dev(dev));
+	else if (dev_data->ats.enabled)
 		pci_disable_ats(to_pci_dev(dev));
-		dev_data->ats.enabled = false;
-	}
+
+	dev_data->ats.enabled = false;
 }
 
 /*
@@ -1818,18 +2188,20 @@ static struct protection_domain *domain_for_device(struct device *dev)
 static int device_change_notifier(struct notifier_block *nb,
 				  unsigned long action, void *data)
 {
-	struct device *dev = data;
-	u16 devid;
-	struct protection_domain *domain;
 	struct dma_ops_domain *dma_domain;
+	struct protection_domain *domain;
+	struct iommu_dev_data *dev_data;
+	struct device *dev = data;
 	struct amd_iommu *iommu;
 	unsigned long flags;
+	u16 devid;
 
 	if (!check_device(dev))
 		return 0;
 
-	devid  = get_device_id(dev);
-	iommu  = amd_iommu_rlookup_table[devid];
+	devid    = get_device_id(dev);
+	iommu    = amd_iommu_rlookup_table[devid];
+	dev_data = get_dev_data(dev);
 
 	switch (action) {
 	case BUS_NOTIFY_UNBOUND_DRIVER:
@@ -1838,7 +2210,7 @@ static int device_change_notifier(struct notifier_block *nb,
 
 		if (!domain)
 			goto out;
-		if (iommu_pass_through)
+		if (dev_data->passthrough)
 			break;
 		detach_device(dev);
 		break;
@@ -2434,8 +2806,9 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
  */
 static void prealloc_protection_domains(void)
 {
-	struct pci_dev *dev = NULL;
+	struct iommu_dev_data *dev_data;
 	struct dma_ops_domain *dma_dom;
+	struct pci_dev *dev = NULL;
 	u16 devid;
 
 	for_each_pci_dev(dev) {
@@ -2444,6 +2817,16 @@ static void prealloc_protection_domains(void)
 		if (!check_device(&dev->dev))
 			continue;
 
+		dev_data = get_dev_data(&dev->dev);
+		if (!amd_iommu_force_isolation && dev_data->iommu_v2) {
+			/* Make sure passthrough domain is allocated */
+			alloc_passthrough_domain();
+			dev_data->passthrough = true;
+			attach_device(&dev->dev, pt_domain);
+			pr_info("AMD-Vi: Using passthough domain for device %s\n",
+				dev_name(&dev->dev));
+		}
+
 		/* Is there already any domain for it? */
 		if (domain_for_device(&dev->dev))
 			continue;
@@ -2474,6 +2857,7 @@ static struct dma_map_ops amd_iommu_dma_ops = {
 
 static unsigned device_dma_ops_init(void)
 {
+	struct iommu_dev_data *dev_data;
 	struct pci_dev *pdev = NULL;
 	unsigned unhandled = 0;
 
@@ -2483,7 +2867,12 @@ static unsigned device_dma_ops_init(void)
 			continue;
 		}
 
-		pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops;
+		dev_data = get_dev_data(&pdev->dev);
+
+		if (!dev_data->passthrough)
+			pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops;
+		else
+			pdev->dev.archdata.dma_ops = &nommu_dma_ops;
 	}
 
 	return unhandled;
@@ -2495,7 +2884,7 @@ static unsigned device_dma_ops_init(void)
 
 void __init amd_iommu_init_api(void)
 {
-	register_iommu(&amd_iommu_ops);
+	bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
 }
 
 int __init amd_iommu_init_dma_ops(void)
@@ -2610,6 +2999,20 @@ out_err:
 	return NULL;
 }
 
+static int __init alloc_passthrough_domain(void)
+{
+	if (pt_domain != NULL)
+		return 0;
+
+	/* allocate passthrough domain */
+	pt_domain = protection_domain_alloc();
+	if (!pt_domain)
+		return -ENOMEM;
+
+	pt_domain->mode = PAGE_MODE_NONE;
+
+	return 0;
+}
 static int amd_iommu_domain_init(struct iommu_domain *dom)
 {
 	struct protection_domain *domain;
@@ -2623,6 +3026,8 @@ static int amd_iommu_domain_init(struct iommu_domain *dom)
 	if (!domain->pt_root)
 		goto out_free;
 
+	domain->iommu_domain = dom;
+
 	dom->priv = domain;
 
 	return 0;
@@ -2645,7 +3050,11 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)
 
 	BUG_ON(domain->dev_cnt != 0);
 
-	free_pagetable(domain);
+	if (domain->mode != PAGE_MODE_NONE)
+		free_pagetable(domain);
+
+	if (domain->flags & PD_IOMMUV2_MASK)
+		free_gcr3_table(domain);
 
 	protection_domain_free(domain);
 
@@ -2702,13 +3111,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
 }
 
 static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
-			 phys_addr_t paddr, int gfp_order, int iommu_prot)
+			 phys_addr_t paddr, size_t page_size, int iommu_prot)
 {
-	unsigned long page_size = 0x1000UL << gfp_order;
 	struct protection_domain *domain = dom->priv;
 	int prot = 0;
 	int ret;
 
+	if (domain->mode == PAGE_MODE_NONE)
+		return -EINVAL;
+
 	if (iommu_prot & IOMMU_READ)
 		prot |= IOMMU_PROT_IR;
 	if (iommu_prot & IOMMU_WRITE)
@@ -2721,13 +3132,14 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
 	return ret;
 }
 
-static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
-			   int gfp_order)
+static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
+			   size_t page_size)
 {
 	struct protection_domain *domain = dom->priv;
-	unsigned long page_size, unmap_size;
+	size_t unmap_size;
 
-	page_size  = 0x1000UL << gfp_order;
+	if (domain->mode == PAGE_MODE_NONE)
+		return -EINVAL;
 
 	mutex_lock(&domain->api_lock);
 	unmap_size = iommu_unmap_page(domain, iova, page_size);
@@ -2735,7 +3147,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
 
 	domain_flush_tlb_pde(domain);
 
-	return get_order(unmap_size);
+	return unmap_size;
 }
 
 static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
@@ -2746,6 +3158,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
 	phys_addr_t paddr;
 	u64 *pte, __pte;
 
+	if (domain->mode == PAGE_MODE_NONE)
+		return iova;
+
 	pte = fetch_pte(domain, iova);
 
 	if (!pte || !IOMMU_PTE_PRESENT(*pte))
@@ -2773,6 +3188,26 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
 	return 0;
 }
 
+static int amd_iommu_device_group(struct device *dev, unsigned int *groupid)
+{
+	struct iommu_dev_data *dev_data = dev->archdata.iommu;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	u16 devid;
+
+	if (!dev_data)
+		return -ENODEV;
+
+	if (pdev->is_virtfn || !iommu_group_mf)
+		devid = dev_data->devid;
+	else
+		devid = calc_devid(pdev->bus->number,
+				   PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
+
+	*groupid = amd_iommu_alias_table[devid];
+
+	return 0;
+}
+
 static struct iommu_ops amd_iommu_ops = {
 	.domain_init = amd_iommu_domain_init,
 	.domain_destroy = amd_iommu_domain_destroy,
@@ -2782,6 +3217,8 @@ static struct iommu_ops amd_iommu_ops = {
 	.unmap = amd_iommu_unmap,
 	.iova_to_phys = amd_iommu_iova_to_phys,
 	.domain_has_cap = amd_iommu_domain_has_cap,
+	.device_group = amd_iommu_device_group,
+	.pgsize_bitmap	= AMD_IOMMU_PGSIZES,
 };
 
 /*****************************************************************************
@@ -2796,21 +3233,23 @@ static struct iommu_ops amd_iommu_ops = {
 
 int __init amd_iommu_init_passthrough(void)
 {
-	struct amd_iommu *iommu;
+	struct iommu_dev_data *dev_data;
 	struct pci_dev *dev = NULL;
+	struct amd_iommu *iommu;
 	u16 devid;
+	int ret;
 
-	/* allocate passthrough domain */
-	pt_domain = protection_domain_alloc();
-	if (!pt_domain)
-		return -ENOMEM;
-
-	pt_domain->mode |= PAGE_MODE_NONE;
+	ret = alloc_passthrough_domain();
+	if (ret)
+		return ret;
 
 	for_each_pci_dev(dev) {
 		if (!check_device(&dev->dev))
 			continue;
 
+		dev_data = get_dev_data(&dev->dev);
+		dev_data->passthrough = true;
+
 		devid = get_device_id(&dev->dev);
 
 		iommu = amd_iommu_rlookup_table[devid];
@@ -2820,7 +3259,375 @@ int __init amd_iommu_init_passthrough(void)
 		attach_device(&dev->dev, pt_domain);
 	}
 
+	amd_iommu_stats_init();
+
 	pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
 
 	return 0;
 }
+
+/* IOMMUv2 specific functions */
+int amd_iommu_register_ppr_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&ppr_notifier, nb);
+}
+EXPORT_SYMBOL(amd_iommu_register_ppr_notifier);
+
+int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&ppr_notifier, nb);
+}
+EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
+
+void amd_iommu_domain_direct_map(struct iommu_domain *dom)
+{
+	struct protection_domain *domain = dom->priv;
+	unsigned long flags;
+
+	spin_lock_irqsave(&domain->lock, flags);
+
+	/* Update data structure */
+	domain->mode    = PAGE_MODE_NONE;
+	domain->updated = true;
+
+	/* Make changes visible to IOMMUs */
+	update_domain(domain);
+
+	/* Page-table is not visible to IOMMU anymore, so free it */
+	free_pagetable(domain);
+
+	spin_unlock_irqrestore(&domain->lock, flags);
+}
+EXPORT_SYMBOL(amd_iommu_domain_direct_map);
+
+int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
+{
+	struct protection_domain *domain = dom->priv;
+	unsigned long flags;
+	int levels, ret;
+
+	if (pasids <= 0 || pasids > (PASID_MASK + 1))
+		return -EINVAL;
+
+	/* Number of GCR3 table levels required */
+	for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9)
+		levels += 1;
+
+	if (levels > amd_iommu_max_glx_val)
+		return -EINVAL;
+
+	spin_lock_irqsave(&domain->lock, flags);
+
+	/*
+	 * Save us all sanity checks whether devices already in the
+	 * domain support IOMMUv2. Just force that the domain has no
+	 * devices attached when it is switched into IOMMUv2 mode.
+	 */
+	ret = -EBUSY;
+	if (domain->dev_cnt > 0 || domain->flags & PD_IOMMUV2_MASK)
+		goto out;
+
+	ret = -ENOMEM;
+	domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC);
+	if (domain->gcr3_tbl == NULL)
+		goto out;
+
+	domain->glx      = levels;
+	domain->flags   |= PD_IOMMUV2_MASK;
+	domain->updated  = true;
+
+	update_domain(domain);
+
+	ret = 0;
+
+out:
+	spin_unlock_irqrestore(&domain->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
+
+static int __flush_pasid(struct protection_domain *domain, int pasid,
+			 u64 address, bool size)
+{
+	struct iommu_dev_data *dev_data;
+	struct iommu_cmd cmd;
+	int i, ret;
+
+	if (!(domain->flags & PD_IOMMUV2_MASK))
+		return -EINVAL;
+
+	build_inv_iommu_pasid(&cmd, domain->id, pasid, address, size);
+
+	/*
+	 * IOMMU TLB needs to be flushed before Device TLB to
+	 * prevent device TLB refill from IOMMU TLB
+	 */
+	for (i = 0; i < amd_iommus_present; ++i) {
+		if (domain->dev_iommu[i] == 0)
+			continue;
+
+		ret = iommu_queue_command(amd_iommus[i], &cmd);
+		if (ret != 0)
+			goto out;
+	}
+
+	/* Wait until IOMMU TLB flushes are complete */
+	domain_flush_complete(domain);
+
+	/* Now flush device TLBs */
+	list_for_each_entry(dev_data, &domain->dev_list, list) {
+		struct amd_iommu *iommu;
+		int qdep;
+
+		BUG_ON(!dev_data->ats.enabled);
+
+		qdep  = dev_data->ats.qdep;
+		iommu = amd_iommu_rlookup_table[dev_data->devid];
+
+		build_inv_iotlb_pasid(&cmd, dev_data->devid, pasid,
+				      qdep, address, size);
+
+		ret = iommu_queue_command(iommu, &cmd);
+		if (ret != 0)
+			goto out;
+	}
+
+	/* Wait until all device TLBs are flushed */
+	domain_flush_complete(domain);
+
+	ret = 0;
+
+out:
+
+	return ret;
+}
+
+static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid,
+				  u64 address)
+{
+	INC_STATS_COUNTER(invalidate_iotlb);
+
+	return __flush_pasid(domain, pasid, address, false);
+}
+
+int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
+			 u64 address)
+{
+	struct protection_domain *domain = dom->priv;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&domain->lock, flags);
+	ret = __amd_iommu_flush_page(domain, pasid, address);
+	spin_unlock_irqrestore(&domain->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_flush_page);
+
+static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid)
+{
+	INC_STATS_COUNTER(invalidate_iotlb_all);
+
+	return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
+			     true);
+}
+
+int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid)
+{
+	struct protection_domain *domain = dom->priv;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&domain->lock, flags);
+	ret = __amd_iommu_flush_tlb(domain, pasid);
+	spin_unlock_irqrestore(&domain->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_flush_tlb);
+
+static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc)
+{
+	int index;
+	u64 *pte;
+
+	while (true) {
+
+		index = (pasid >> (9 * level)) & 0x1ff;
+		pte   = &root[index];
+
+		if (level == 0)
+			break;
+
+		if (!(*pte & GCR3_VALID)) {
+			if (!alloc)
+				return NULL;
+
+			root = (void *)get_zeroed_page(GFP_ATOMIC);
+			if (root == NULL)
+				return NULL;
+
+			*pte = __pa(root) | GCR3_VALID;
+		}
+
+		root = __va(*pte & PAGE_MASK);
+
+		level -= 1;
+	}
+
+	return pte;
+}
+
+static int __set_gcr3(struct protection_domain *domain, int pasid,
+		      unsigned long cr3)
+{
+	u64 *pte;
+
+	if (domain->mode != PAGE_MODE_NONE)
+		return -EINVAL;
+
+	pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
+	if (pte == NULL)
+		return -ENOMEM;
+
+	*pte = (cr3 & PAGE_MASK) | GCR3_VALID;
+
+	return __amd_iommu_flush_tlb(domain, pasid);
+}
+
+static int __clear_gcr3(struct protection_domain *domain, int pasid)
+{
+	u64 *pte;
+
+	if (domain->mode != PAGE_MODE_NONE)
+		return -EINVAL;
+
+	pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
+	if (pte == NULL)
+		return 0;
+
+	*pte = 0;
+
+	return __amd_iommu_flush_tlb(domain, pasid);
+}
+
+int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
+			      unsigned long cr3)
+{
+	struct protection_domain *domain = dom->priv;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&domain->lock, flags);
+	ret = __set_gcr3(domain, pasid, cr3);
+	spin_unlock_irqrestore(&domain->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_domain_set_gcr3);
+
+int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid)
+{
+	struct protection_domain *domain = dom->priv;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&domain->lock, flags);
+	ret = __clear_gcr3(domain, pasid);
+	spin_unlock_irqrestore(&domain->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3);
+
+int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
+			   int status, int tag)
+{
+	struct iommu_dev_data *dev_data;
+	struct amd_iommu *iommu;
+	struct iommu_cmd cmd;
+
+	INC_STATS_COUNTER(complete_ppr);
+
+	dev_data = get_dev_data(&pdev->dev);
+	iommu    = amd_iommu_rlookup_table[dev_data->devid];
+
+	build_complete_ppr(&cmd, dev_data->devid, pasid, status,
+			   tag, dev_data->pri_tlp);
+
+	return iommu_queue_command(iommu, &cmd);
+}
+EXPORT_SYMBOL(amd_iommu_complete_ppr);
+
+struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev)
+{
+	struct protection_domain *domain;
+
+	domain = get_domain(&pdev->dev);
+	if (IS_ERR(domain))
+		return NULL;
+
+	/* Only return IOMMUv2 domains */
+	if (!(domain->flags & PD_IOMMUV2_MASK))
+		return NULL;
+
+	return domain->iommu_domain;
+}
+EXPORT_SYMBOL(amd_iommu_get_v2_domain);
+
+void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum)
+{
+	struct iommu_dev_data *dev_data;
+
+	if (!amd_iommu_v2_supported())
+		return;
+
+	dev_data = get_dev_data(&pdev->dev);
+	dev_data->errata |= (1 << erratum);
+}
+EXPORT_SYMBOL(amd_iommu_enable_device_erratum);
+
+int amd_iommu_device_info(struct pci_dev *pdev,
+                          struct amd_iommu_device_info *info)
+{
+	int max_pasids;
+	int pos;
+
+	if (pdev == NULL || info == NULL)
+		return -EINVAL;
+
+	if (!amd_iommu_v2_supported())
+		return -EINVAL;
+
+	memset(info, 0, sizeof(*info));
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS);
+	if (pos)
+		info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
+	if (pos)
+		info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
+	if (pos) {
+		int features;
+
+		max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1));
+		max_pasids = min(max_pasids, (1 << 20));
+
+		info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
+		info->max_pasids = min(pci_max_pasids(pdev), max_pasids);
+
+		features = pci_pasid_features(pdev);
+		if (features & PCI_PASID_CAP_EXEC)
+			info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
+		if (features & PCI_PASID_CAP_PRIV)
+			info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(amd_iommu_device_info);
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 82d2410f4205..bdea288dc185 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -25,6 +25,7 @@
 #include <linux/interrupt.h>
 #include <linux/msi.h>
 #include <linux/amd-iommu.h>
+#include <linux/export.h>
 #include <asm/pci-direct.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
@@ -141,6 +142,12 @@ int amd_iommus_present;
 bool amd_iommu_np_cache __read_mostly;
 bool amd_iommu_iotlb_sup __read_mostly = true;
 
+u32 amd_iommu_max_pasids __read_mostly = ~0;
+
+bool amd_iommu_v2_present __read_mostly;
+
+bool amd_iommu_force_isolation __read_mostly;
+
 /*
  * The ACPI table parsing functions set this variable on an error
  */
@@ -299,6 +306,16 @@ static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 	writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
 }
 
+static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
+{
+	u32 ctrl;
+
+	ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
+	ctrl &= ~CTRL_INV_TO_MASK;
+	ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
+	writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
+}
+
 /* Function to enable the hardware */
 static void iommu_enable(struct amd_iommu *iommu)
 {
@@ -581,21 +598,69 @@ static void __init free_event_buffer(struct amd_iommu *iommu)
 	free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
 }
 
+/* allocates the memory where the IOMMU will log its events to */
+static u8 * __init alloc_ppr_log(struct amd_iommu *iommu)
+{
+	iommu->ppr_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+						get_order(PPR_LOG_SIZE));
+
+	if (iommu->ppr_log == NULL)
+		return NULL;
+
+	return iommu->ppr_log;
+}
+
+static void iommu_enable_ppr_log(struct amd_iommu *iommu)
+{
+	u64 entry;
+
+	if (iommu->ppr_log == NULL)
+		return;
+
+	entry = (u64)virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
+
+	memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
+		    &entry, sizeof(entry));
+
+	/* set head and tail to zero manually */
+	writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
+	writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
+
+	iommu_feature_enable(iommu, CONTROL_PPFLOG_EN);
+	iommu_feature_enable(iommu, CONTROL_PPR_EN);
+}
+
+static void __init free_ppr_log(struct amd_iommu *iommu)
+{
+	if (iommu->ppr_log == NULL)
+		return;
+
+	free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
+}
+
+static void iommu_enable_gt(struct amd_iommu *iommu)
+{
+	if (!iommu_feature(iommu, FEATURE_GT))
+		return;
+
+	iommu_feature_enable(iommu, CONTROL_GT_EN);
+}
+
 /* sets a specific bit in the device table entry. */
 static void set_dev_entry_bit(u16 devid, u8 bit)
 {
-	int i = (bit >> 5) & 0x07;
-	int _bit = bit & 0x1f;
+	int i = (bit >> 6) & 0x03;
+	int _bit = bit & 0x3f;
 
-	amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
+	amd_iommu_dev_table[devid].data[i] |= (1UL << _bit);
 }
 
 static int get_dev_entry_bit(u16 devid, u8 bit)
 {
-	int i = (bit >> 5) & 0x07;
-	int _bit = bit & 0x1f;
+	int i = (bit >> 6) & 0x03;
+	int _bit = bit & 0x3f;
 
-	return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit;
+	return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
 }
 
 
@@ -699,6 +764,32 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 
 	iommu->features = ((u64)high << 32) | low;
 
+	if (iommu_feature(iommu, FEATURE_GT)) {
+		int glxval;
+		u32 pasids;
+		u64 shift;
+
+		shift   = iommu->features & FEATURE_PASID_MASK;
+		shift >>= FEATURE_PASID_SHIFT;
+		pasids  = (1 << shift);
+
+		amd_iommu_max_pasids = min(amd_iommu_max_pasids, pasids);
+
+		glxval   = iommu->features & FEATURE_GLXVAL_MASK;
+		glxval >>= FEATURE_GLXVAL_SHIFT;
+
+		if (amd_iommu_max_glx_val == -1)
+			amd_iommu_max_glx_val = glxval;
+		else
+			amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
+	}
+
+	if (iommu_feature(iommu, FEATURE_GT) &&
+	    iommu_feature(iommu, FEATURE_PPR)) {
+		iommu->is_iommu_v2   = true;
+		amd_iommu_v2_present = true;
+	}
+
 	if (!is_rd890_iommu(iommu->dev))
 		return;
 
@@ -901,6 +992,7 @@ static void __init free_iommu_one(struct amd_iommu *iommu)
 {
 	free_command_buffer(iommu);
 	free_event_buffer(iommu);
+	free_ppr_log(iommu);
 	iommu_unmap_mmio_space(iommu);
 }
 
@@ -964,6 +1056,12 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 	init_iommu_from_acpi(iommu, h);
 	init_iommu_devices(iommu);
 
+	if (iommu_feature(iommu, FEATURE_PPR)) {
+		iommu->ppr_log = alloc_ppr_log(iommu);
+		if (!iommu->ppr_log)
+			return -ENOMEM;
+	}
+
 	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
 		amd_iommu_np_cache = true;
 
@@ -1050,6 +1148,9 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
 	iommu->int_enabled = true;
 	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
 
+	if (iommu->ppr_log != NULL)
+		iommu_feature_enable(iommu, CONTROL_PPFINT_EN);
+
 	return 0;
 }
 
@@ -1209,6 +1310,9 @@ static void iommu_init_flags(struct amd_iommu *iommu)
 	 * make IOMMU memory accesses cache coherent
 	 */
 	iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
+
+	/* Set IOTLB invalidation timeout to 1s */
+	iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
 }
 
 static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
@@ -1274,6 +1378,8 @@ static void enable_iommus(void)
 		iommu_set_device_table(iommu);
 		iommu_enable_command_buffer(iommu);
 		iommu_enable_event_buffer(iommu);
+		iommu_enable_ppr_log(iommu);
+		iommu_enable_gt(iommu);
 		iommu_set_exclusion_range(iommu);
 		iommu_init_msi(iommu);
 		iommu_enable(iommu);
@@ -1303,13 +1409,6 @@ static void amd_iommu_resume(void)
 
 	/* re-load the hardware */
 	enable_iommus();
-
-	/*
-	 * we have to flush after the IOMMUs are enabled because a
-	 * disabled IOMMU will never execute the commands we send
-	 */
-	for_each_iommu(iommu)
-		iommu_flush_all_caches(iommu);
 }
 
 static int amd_iommu_suspend(void)
@@ -1560,6 +1659,8 @@ static int __init parse_amd_iommu_options(char *str)
 			amd_iommu_unmap_flush = true;
 		if (strncmp(str, "off", 3) == 0)
 			amd_iommu_disabled = true;
+		if (strncmp(str, "force_isolation", 15) == 0)
+			amd_iommu_force_isolation = true;
 	}
 
 	return 1;
@@ -1572,3 +1673,9 @@ IOMMU_INIT_FINISH(amd_iommu_detect,
 		  gart_iommu_hole_init,
 		  0,
 		  0);
+
+bool amd_iommu_v2_supported(void)
+{
+	return amd_iommu_v2_present;
+}
+EXPORT_SYMBOL(amd_iommu_v2_supported);
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 7ffaa64410b0..1a7f41c6cc66 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -31,6 +31,30 @@ extern int amd_iommu_init_devices(void);
 extern void amd_iommu_uninit_devices(void);
 extern void amd_iommu_init_notifier(void);
 extern void amd_iommu_init_api(void);
+
+/* IOMMUv2 specific functions */
+struct iommu_domain;
+
+extern bool amd_iommu_v2_supported(void);
+extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
+extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
+extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
+extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
+extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
+				u64 address);
+extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
+extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
+				     unsigned long cr3);
+extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
+extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
+
+#define PPR_SUCCESS			0x0
+#define PPR_INVALID			0x1
+#define PPR_FAILURE			0xf
+
+extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
+				  int status, int tag);
+
 #ifndef CONFIG_AMD_IOMMU_STATS
 
 static inline void amd_iommu_stats_init(void) { }
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 5b9c5075e81a..2452f3b71736 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -69,11 +69,14 @@
 #define MMIO_EXCL_BASE_OFFSET   0x0020
 #define MMIO_EXCL_LIMIT_OFFSET  0x0028
 #define MMIO_EXT_FEATURES	0x0030
+#define MMIO_PPR_LOG_OFFSET	0x0038
 #define MMIO_CMD_HEAD_OFFSET	0x2000
 #define MMIO_CMD_TAIL_OFFSET	0x2008
 #define MMIO_EVT_HEAD_OFFSET	0x2010
 #define MMIO_EVT_TAIL_OFFSET	0x2018
 #define MMIO_STATUS_OFFSET	0x2020
+#define MMIO_PPR_HEAD_OFFSET	0x2030
+#define MMIO_PPR_TAIL_OFFSET	0x2038
 
 
 /* Extended Feature Bits */
@@ -87,8 +90,17 @@
 #define FEATURE_HE		(1ULL<<8)
 #define FEATURE_PC		(1ULL<<9)
 
+#define FEATURE_PASID_SHIFT	32
+#define FEATURE_PASID_MASK	(0x1fULL << FEATURE_PASID_SHIFT)
+
+#define FEATURE_GLXVAL_SHIFT	14
+#define FEATURE_GLXVAL_MASK	(0x03ULL << FEATURE_GLXVAL_SHIFT)
+
+#define PASID_MASK		0x000fffff
+
 /* MMIO status bits */
-#define MMIO_STATUS_COM_WAIT_INT_MASK	0x04
+#define MMIO_STATUS_COM_WAIT_INT_MASK	(1 << 2)
+#define MMIO_STATUS_PPR_INT_MASK	(1 << 6)
 
 /* event logging constants */
 #define EVENT_ENTRY_SIZE	0x10
@@ -115,6 +127,7 @@
 #define CONTROL_EVT_LOG_EN      0x02ULL
 #define CONTROL_EVT_INT_EN      0x03ULL
 #define CONTROL_COMWAIT_EN      0x04ULL
+#define CONTROL_INV_TIMEOUT	0x05ULL
 #define CONTROL_PASSPW_EN       0x08ULL
 #define CONTROL_RESPASSPW_EN    0x09ULL
 #define CONTROL_COHERENT_EN     0x0aULL
@@ -122,18 +135,34 @@
 #define CONTROL_CMDBUF_EN       0x0cULL
 #define CONTROL_PPFLOG_EN       0x0dULL
 #define CONTROL_PPFINT_EN       0x0eULL
+#define CONTROL_PPR_EN          0x0fULL
+#define CONTROL_GT_EN           0x10ULL
+
+#define CTRL_INV_TO_MASK	(7 << CONTROL_INV_TIMEOUT)
+#define CTRL_INV_TO_NONE	0
+#define CTRL_INV_TO_1MS		1
+#define CTRL_INV_TO_10MS	2
+#define CTRL_INV_TO_100MS	3
+#define CTRL_INV_TO_1S		4
+#define CTRL_INV_TO_10S		5
+#define CTRL_INV_TO_100S	6
 
 /* command specific defines */
 #define CMD_COMPL_WAIT          0x01
 #define CMD_INV_DEV_ENTRY       0x02
 #define CMD_INV_IOMMU_PAGES	0x03
 #define CMD_INV_IOTLB_PAGES	0x04
+#define CMD_COMPLETE_PPR	0x07
 #define CMD_INV_ALL		0x08
 
 #define CMD_COMPL_WAIT_STORE_MASK	0x01
 #define CMD_COMPL_WAIT_INT_MASK		0x02
 #define CMD_INV_IOMMU_PAGES_SIZE_MASK	0x01
 #define CMD_INV_IOMMU_PAGES_PDE_MASK	0x02
+#define CMD_INV_IOMMU_PAGES_GN_MASK	0x04
+
+#define PPR_STATUS_MASK			0xf
+#define PPR_STATUS_SHIFT		12
 
 #define CMD_INV_IOMMU_ALL_PAGES_ADDRESS	0x7fffffffffffffffULL
 
@@ -165,6 +194,23 @@
 #define EVT_BUFFER_SIZE		8192 /* 512 entries */
 #define EVT_LEN_MASK		(0x9ULL << 56)
 
+/* Constants for PPR Log handling */
+#define PPR_LOG_ENTRIES		512
+#define PPR_LOG_SIZE_SHIFT	56
+#define PPR_LOG_SIZE_512	(0x9ULL << PPR_LOG_SIZE_SHIFT)
+#define PPR_ENTRY_SIZE		16
+#define PPR_LOG_SIZE		(PPR_ENTRY_SIZE * PPR_LOG_ENTRIES)
+
+#define PPR_REQ_TYPE(x)		(((x) >> 60) & 0xfULL)
+#define PPR_FLAGS(x)		(((x) >> 48) & 0xfffULL)
+#define PPR_DEVID(x)		((x) & 0xffffULL)
+#define PPR_TAG(x)		(((x) >> 32) & 0x3ffULL)
+#define PPR_PASID1(x)		(((x) >> 16) & 0xffffULL)
+#define PPR_PASID2(x)		(((x) >> 42) & 0xfULL)
+#define PPR_PASID(x)		((PPR_PASID2(x) << 16) | PPR_PASID1(x))
+
+#define PPR_REQ_FAULT		0x01
+
 #define PAGE_MODE_NONE    0x00
 #define PAGE_MODE_1_LEVEL 0x01
 #define PAGE_MODE_2_LEVEL 0x02
@@ -230,7 +276,24 @@
 #define IOMMU_PTE_IR (1ULL << 61)
 #define IOMMU_PTE_IW (1ULL << 62)
 
-#define DTE_FLAG_IOTLB	0x01
+#define DTE_FLAG_IOTLB	(0x01UL << 32)
+#define DTE_FLAG_GV	(0x01ULL << 55)
+#define DTE_GLX_SHIFT	(56)
+#define DTE_GLX_MASK	(3)
+
+#define DTE_GCR3_VAL_A(x)	(((x) >> 12) & 0x00007ULL)
+#define DTE_GCR3_VAL_B(x)	(((x) >> 15) & 0x0ffffULL)
+#define DTE_GCR3_VAL_C(x)	(((x) >> 31) & 0xfffffULL)
+
+#define DTE_GCR3_INDEX_A	0
+#define DTE_GCR3_INDEX_B	1
+#define DTE_GCR3_INDEX_C	1
+
+#define DTE_GCR3_SHIFT_A	58
+#define DTE_GCR3_SHIFT_B	16
+#define DTE_GCR3_SHIFT_C	43
+
+#define GCR3_VALID		0x01ULL
 
 #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
 #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
@@ -257,6 +320,7 @@
 					      domain for an IOMMU */
 #define PD_PASSTHROUGH_MASK	(1UL << 2) /* domain has no page
 					      translation */
+#define PD_IOMMUV2_MASK		(1UL << 3) /* domain has gcr3 table */
 
 extern bool amd_iommu_dump;
 #define DUMP_printk(format, arg...)					\
@@ -285,6 +349,29 @@ extern bool amd_iommu_iotlb_sup;
 #define APERTURE_RANGE_INDEX(a)	((a) >> APERTURE_RANGE_SHIFT)
 #define APERTURE_PAGE_INDEX(a)	(((a) >> 21) & 0x3fULL)
 
+
+/*
+ * This struct is used to pass information about
+ * incoming PPR faults around.
+ */
+struct amd_iommu_fault {
+	u64 address;    /* IO virtual address of the fault*/
+	u32 pasid;      /* Address space identifier */
+	u16 device_id;  /* Originating PCI device id */
+	u16 tag;        /* PPR tag */
+	u16 flags;      /* Fault flags */
+
+};
+
+#define PPR_FAULT_EXEC	(1 << 1)
+#define PPR_FAULT_READ  (1 << 2)
+#define PPR_FAULT_WRITE (1 << 5)
+#define PPR_FAULT_USER  (1 << 6)
+#define PPR_FAULT_RSVD  (1 << 7)
+#define PPR_FAULT_GN    (1 << 8)
+
+struct iommu_domain;
+
 /*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
@@ -297,11 +384,15 @@ struct protection_domain {
 	u16 id;			/* the domain id written to the device table */
 	int mode;		/* paging mode (0-6 levels) */
 	u64 *pt_root;		/* page table root pointer */
+	int glx;		/* Number of levels for GCR3 table */
+	u64 *gcr3_tbl;		/* Guest CR3 table */
 	unsigned long flags;	/* flags to find out type of domain */
 	bool updated;		/* complete domain flush required */
 	unsigned dev_cnt;	/* devices assigned to this domain */
 	unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
 	void *priv;		/* private data */
+	struct iommu_domain *iommu_domain; /* Pointer to generic
+					      domain structure */
 
 };
 
@@ -315,10 +406,15 @@ struct iommu_dev_data {
 	struct protection_domain *domain; /* Domain the device is bound to */
 	atomic_t bind;			  /* Domain attach reverent count */
 	u16 devid;			  /* PCI Device ID */
+	bool iommu_v2;			  /* Device can make use of IOMMUv2 */
+	bool passthrough;		  /* Default for device is pt_domain */
 	struct {
 		bool enabled;
 		int qdep;
 	} ats;				  /* ATS state */
+	bool pri_tlp;			  /* PASID TLB required for
+					     PPR completions */
+	u32 errata;			  /* Bitmap for errata to apply */
 };
 
 /*
@@ -399,6 +495,9 @@ struct amd_iommu {
 	/* Extended features */
 	u64 features;
 
+	/* IOMMUv2 */
+	bool is_iommu_v2;
+
 	/*
 	 * Capability pointer. There could be more than one IOMMU per PCI
 	 * device function if there are more than one AMD IOMMU capability
@@ -431,6 +530,9 @@ struct amd_iommu {
 	/* MSI number for event interrupt */
 	u16 evt_msi_num;
 
+	/* Base of the PPR log, if present */
+	u8 *ppr_log;
+
 	/* true if interrupts for this IOMMU are already enabled */
 	bool int_enabled;
 
@@ -484,7 +586,7 @@ extern struct list_head amd_iommu_pd_list;
  * Structure defining one entry in the device table
  */
 struct dev_table_entry {
-	u32 data[8];
+	u64 data[4];
 };
 
 /*
@@ -549,6 +651,16 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap;
  */
 extern bool amd_iommu_unmap_flush;
 
+/* Smallest number of PASIDs supported by any IOMMU in the system */
+extern u32 amd_iommu_max_pasids;
+
+extern bool amd_iommu_v2_present;
+
+extern bool amd_iommu_force_isolation;
+
+/* Max levels of glxval supported */
+extern int amd_iommu_max_glx_val;
+
 /* takes bus and device/function and returns the device id
  * FIXME: should that be in generic PCI code? */
 static inline u16 calc_devid(u8 bus, u8 devfn)
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
new file mode 100644
index 000000000000..8add9f125d3e
--- /dev/null
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -0,0 +1,994 @@
+/*
+ * Copyright (C) 2010-2012 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/mmu_notifier.h>
+#include <linux/amd-iommu.h>
+#include <linux/mm_types.h>
+#include <linux/profile.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/iommu.h>
+#include <linux/wait.h>
+#include <linux/pci.h>
+#include <linux/gfp.h>
+
+#include "amd_iommu_types.h"
+#include "amd_iommu_proto.h"
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>");
+
+#define MAX_DEVICES		0x10000
+#define PRI_QUEUE_SIZE		512
+
+struct pri_queue {
+	atomic_t inflight;
+	bool finish;
+	int status;
+};
+
+struct pasid_state {
+	struct list_head list;			/* For global state-list */
+	atomic_t count;				/* Reference count */
+	struct task_struct *task;		/* Task bound to this PASID */
+	struct mm_struct *mm;			/* mm_struct for the faults */
+	struct mmu_notifier mn;                 /* mmu_otifier handle */
+	struct pri_queue pri[PRI_QUEUE_SIZE];	/* PRI tag states */
+	struct device_state *device_state;	/* Link to our device_state */
+	int pasid;				/* PASID index */
+	spinlock_t lock;			/* Protect pri_queues */
+	wait_queue_head_t wq;			/* To wait for count == 0 */
+};
+
+struct device_state {
+	atomic_t count;
+	struct pci_dev *pdev;
+	struct pasid_state **states;
+	struct iommu_domain *domain;
+	int pasid_levels;
+	int max_pasids;
+	amd_iommu_invalid_ppr_cb inv_ppr_cb;
+	amd_iommu_invalidate_ctx inv_ctx_cb;
+	spinlock_t lock;
+	wait_queue_head_t wq;
+};
+
+struct fault {
+	struct work_struct work;
+	struct device_state *dev_state;
+	struct pasid_state *state;
+	struct mm_struct *mm;
+	u64 address;
+	u16 devid;
+	u16 pasid;
+	u16 tag;
+	u16 finish;
+	u16 flags;
+};
+
+struct device_state **state_table;
+static spinlock_t state_lock;
+
+/* List and lock for all pasid_states */
+static LIST_HEAD(pasid_state_list);
+static DEFINE_SPINLOCK(ps_lock);
+
+static struct workqueue_struct *iommu_wq;
+
+/*
+ * Empty page table - Used between
+ * mmu_notifier_invalidate_range_start and
+ * mmu_notifier_invalidate_range_end
+ */
+static u64 *empty_page_table;
+
+static void free_pasid_states(struct device_state *dev_state);
+static void unbind_pasid(struct device_state *dev_state, int pasid);
+static int task_exit(struct notifier_block *nb, unsigned long e, void *data);
+
+static u16 device_id(struct pci_dev *pdev)
+{
+	u16 devid;
+
+	devid = pdev->bus->number;
+	devid = (devid << 8) | pdev->devfn;
+
+	return devid;
+}
+
+static struct device_state *get_device_state(u16 devid)
+{
+	struct device_state *dev_state;
+	unsigned long flags;
+
+	spin_lock_irqsave(&state_lock, flags);
+	dev_state = state_table[devid];
+	if (dev_state != NULL)
+		atomic_inc(&dev_state->count);
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	return dev_state;
+}
+
+static void free_device_state(struct device_state *dev_state)
+{
+	/*
+	 * First detach device from domain - No more PRI requests will arrive
+	 * from that device after it is unbound from the IOMMUv2 domain.
+	 */
+	iommu_detach_device(dev_state->domain, &dev_state->pdev->dev);
+
+	/* Everything is down now, free the IOMMUv2 domain */
+	iommu_domain_free(dev_state->domain);
+
+	/* Finally get rid of the device-state */
+	kfree(dev_state);
+}
+
+static void put_device_state(struct device_state *dev_state)
+{
+	if (atomic_dec_and_test(&dev_state->count))
+		wake_up(&dev_state->wq);
+}
+
+static void put_device_state_wait(struct device_state *dev_state)
+{
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait(&dev_state->wq, &wait, TASK_UNINTERRUPTIBLE);
+	if (!atomic_dec_and_test(&dev_state->count))
+		schedule();
+	finish_wait(&dev_state->wq, &wait);
+
+	free_device_state(dev_state);
+}
+
+static struct notifier_block profile_nb = {
+	.notifier_call = task_exit,
+};
+
+static void link_pasid_state(struct pasid_state *pasid_state)
+{
+	spin_lock(&ps_lock);
+	list_add_tail(&pasid_state->list, &pasid_state_list);
+	spin_unlock(&ps_lock);
+}
+
+static void __unlink_pasid_state(struct pasid_state *pasid_state)
+{
+	list_del(&pasid_state->list);
+}
+
+static void unlink_pasid_state(struct pasid_state *pasid_state)
+{
+	spin_lock(&ps_lock);
+	__unlink_pasid_state(pasid_state);
+	spin_unlock(&ps_lock);
+}
+
+/* Must be called under dev_state->lock */
+static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
+						  int pasid, bool alloc)
+{
+	struct pasid_state **root, **ptr;
+	int level, index;
+
+	level = dev_state->pasid_levels;
+	root  = dev_state->states;
+
+	while (true) {
+
+		index = (pasid >> (9 * level)) & 0x1ff;
+		ptr   = &root[index];
+
+		if (level == 0)
+			break;
+
+		if (*ptr == NULL) {
+			if (!alloc)
+				return NULL;
+
+			*ptr = (void *)get_zeroed_page(GFP_ATOMIC);
+			if (*ptr == NULL)
+				return NULL;
+		}
+
+		root   = (struct pasid_state **)*ptr;
+		level -= 1;
+	}
+
+	return ptr;
+}
+
+static int set_pasid_state(struct device_state *dev_state,
+			   struct pasid_state *pasid_state,
+			   int pasid)
+{
+	struct pasid_state **ptr;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dev_state->lock, flags);
+	ptr = __get_pasid_state_ptr(dev_state, pasid, true);
+
+	ret = -ENOMEM;
+	if (ptr == NULL)
+		goto out_unlock;
+
+	ret = -ENOMEM;
+	if (*ptr != NULL)
+		goto out_unlock;
+
+	*ptr = pasid_state;
+
+	ret = 0;
+
+out_unlock:
+	spin_unlock_irqrestore(&dev_state->lock, flags);
+
+	return ret;
+}
+
+static void clear_pasid_state(struct device_state *dev_state, int pasid)
+{
+	struct pasid_state **ptr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_state->lock, flags);
+	ptr = __get_pasid_state_ptr(dev_state, pasid, true);
+
+	if (ptr == NULL)
+		goto out_unlock;
+
+	*ptr = NULL;
+
+out_unlock:
+	spin_unlock_irqrestore(&dev_state->lock, flags);
+}
+
+static struct pasid_state *get_pasid_state(struct device_state *dev_state,
+					   int pasid)
+{
+	struct pasid_state **ptr, *ret = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_state->lock, flags);
+	ptr = __get_pasid_state_ptr(dev_state, pasid, false);
+
+	if (ptr == NULL)
+		goto out_unlock;
+
+	ret = *ptr;
+	if (ret)
+		atomic_inc(&ret->count);
+
+out_unlock:
+	spin_unlock_irqrestore(&dev_state->lock, flags);
+
+	return ret;
+}
+
+static void free_pasid_state(struct pasid_state *pasid_state)
+{
+	kfree(pasid_state);
+}
+
+static void put_pasid_state(struct pasid_state *pasid_state)
+{
+	if (atomic_dec_and_test(&pasid_state->count)) {
+		put_device_state(pasid_state->device_state);
+		wake_up(&pasid_state->wq);
+	}
+}
+
+static void put_pasid_state_wait(struct pasid_state *pasid_state)
+{
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait(&pasid_state->wq, &wait, TASK_UNINTERRUPTIBLE);
+
+	if (atomic_dec_and_test(&pasid_state->count))
+		put_device_state(pasid_state->device_state);
+	else
+		schedule();
+
+	finish_wait(&pasid_state->wq, &wait);
+	mmput(pasid_state->mm);
+	free_pasid_state(pasid_state);
+}
+
+static void __unbind_pasid(struct pasid_state *pasid_state)
+{
+	struct iommu_domain *domain;
+
+	domain = pasid_state->device_state->domain;
+
+	amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid);
+	clear_pasid_state(pasid_state->device_state, pasid_state->pasid);
+
+	/* Make sure no more pending faults are in the queue */
+	flush_workqueue(iommu_wq);
+
+	mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
+
+	put_pasid_state(pasid_state); /* Reference taken in bind() function */
+}
+
+static void unbind_pasid(struct device_state *dev_state, int pasid)
+{
+	struct pasid_state *pasid_state;
+
+	pasid_state = get_pasid_state(dev_state, pasid);
+	if (pasid_state == NULL)
+		return;
+
+	unlink_pasid_state(pasid_state);
+	__unbind_pasid(pasid_state);
+	put_pasid_state_wait(pasid_state); /* Reference taken in this function */
+}
+
+static void free_pasid_states_level1(struct pasid_state **tbl)
+{
+	int i;
+
+	for (i = 0; i < 512; ++i) {
+		if (tbl[i] == NULL)
+			continue;
+
+		free_page((unsigned long)tbl[i]);
+	}
+}
+
+static void free_pasid_states_level2(struct pasid_state **tbl)
+{
+	struct pasid_state **ptr;
+	int i;
+
+	for (i = 0; i < 512; ++i) {
+		if (tbl[i] == NULL)
+			continue;
+
+		ptr = (struct pasid_state **)tbl[i];
+		free_pasid_states_level1(ptr);
+	}
+}
+
+static void free_pasid_states(struct device_state *dev_state)
+{
+	struct pasid_state *pasid_state;
+	int i;
+
+	for (i = 0; i < dev_state->max_pasids; ++i) {
+		pasid_state = get_pasid_state(dev_state, i);
+		if (pasid_state == NULL)
+			continue;
+
+		put_pasid_state(pasid_state);
+		unbind_pasid(dev_state, i);
+	}
+
+	if (dev_state->pasid_levels == 2)
+		free_pasid_states_level2(dev_state->states);
+	else if (dev_state->pasid_levels == 1)
+		free_pasid_states_level1(dev_state->states);
+	else if (dev_state->pasid_levels != 0)
+		BUG();
+
+	free_page((unsigned long)dev_state->states);
+}
+
+static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
+{
+	return container_of(mn, struct pasid_state, mn);
+}
+
+static void __mn_flush_page(struct mmu_notifier *mn,
+			    unsigned long address)
+{
+	struct pasid_state *pasid_state;
+	struct device_state *dev_state;
+
+	pasid_state = mn_to_state(mn);
+	dev_state   = pasid_state->device_state;
+
+	amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address);
+}
+
+static int mn_clear_flush_young(struct mmu_notifier *mn,
+				struct mm_struct *mm,
+				unsigned long address)
+{
+	__mn_flush_page(mn, address);
+
+	return 0;
+}
+
+static void mn_change_pte(struct mmu_notifier *mn,
+			  struct mm_struct *mm,
+			  unsigned long address,
+			  pte_t pte)
+{
+	__mn_flush_page(mn, address);
+}
+
+static void mn_invalidate_page(struct mmu_notifier *mn,
+			       struct mm_struct *mm,
+			       unsigned long address)
+{
+	__mn_flush_page(mn, address);
+}
+
+static void mn_invalidate_range_start(struct mmu_notifier *mn,
+				      struct mm_struct *mm,
+				      unsigned long start, unsigned long end)
+{
+	struct pasid_state *pasid_state;
+	struct device_state *dev_state;
+
+	pasid_state = mn_to_state(mn);
+	dev_state   = pasid_state->device_state;
+
+	amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
+				  __pa(empty_page_table));
+}
+
+static void mn_invalidate_range_end(struct mmu_notifier *mn,
+				    struct mm_struct *mm,
+				    unsigned long start, unsigned long end)
+{
+	struct pasid_state *pasid_state;
+	struct device_state *dev_state;
+
+	pasid_state = mn_to_state(mn);
+	dev_state   = pasid_state->device_state;
+
+	amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
+				  __pa(pasid_state->mm->pgd));
+}
+
+static struct mmu_notifier_ops iommu_mn = {
+	.clear_flush_young      = mn_clear_flush_young,
+	.change_pte             = mn_change_pte,
+	.invalidate_page        = mn_invalidate_page,
+	.invalidate_range_start = mn_invalidate_range_start,
+	.invalidate_range_end   = mn_invalidate_range_end,
+};
+
+static void set_pri_tag_status(struct pasid_state *pasid_state,
+			       u16 tag, int status)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pasid_state->lock, flags);
+	pasid_state->pri[tag].status = status;
+	spin_unlock_irqrestore(&pasid_state->lock, flags);
+}
+
+static void finish_pri_tag(struct device_state *dev_state,
+			   struct pasid_state *pasid_state,
+			   u16 tag)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pasid_state->lock, flags);
+	if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) &&
+	    pasid_state->pri[tag].finish) {
+		amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid,
+				       pasid_state->pri[tag].status, tag);
+		pasid_state->pri[tag].finish = false;
+		pasid_state->pri[tag].status = PPR_SUCCESS;
+	}
+	spin_unlock_irqrestore(&pasid_state->lock, flags);
+}
+
+static void do_fault(struct work_struct *work)
+{
+	struct fault *fault = container_of(work, struct fault, work);
+	int npages, write;
+	struct page *page;
+
+	write = !!(fault->flags & PPR_FAULT_WRITE);
+
+	npages = get_user_pages(fault->state->task, fault->state->mm,
+				fault->address, 1, write, 0, &page, NULL);
+
+	if (npages == 1) {
+		put_page(page);
+	} else if (fault->dev_state->inv_ppr_cb) {
+		int status;
+
+		status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev,
+						      fault->pasid,
+						      fault->address,
+						      fault->flags);
+		switch (status) {
+		case AMD_IOMMU_INV_PRI_RSP_SUCCESS:
+			set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS);
+			break;
+		case AMD_IOMMU_INV_PRI_RSP_INVALID:
+			set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
+			break;
+		case AMD_IOMMU_INV_PRI_RSP_FAIL:
+			set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE);
+			break;
+		default:
+			BUG();
+		}
+	} else {
+		set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
+	}
+
+	finish_pri_tag(fault->dev_state, fault->state, fault->tag);
+
+	put_pasid_state(fault->state);
+
+	kfree(fault);
+}
+
+static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
+{
+	struct amd_iommu_fault *iommu_fault;
+	struct pasid_state *pasid_state;
+	struct device_state *dev_state;
+	unsigned long flags;
+	struct fault *fault;
+	bool finish;
+	u16 tag;
+	int ret;
+
+	iommu_fault = data;
+	tag         = iommu_fault->tag & 0x1ff;
+	finish      = (iommu_fault->tag >> 9) & 1;
+
+	ret = NOTIFY_DONE;
+	dev_state = get_device_state(iommu_fault->device_id);
+	if (dev_state == NULL)
+		goto out;
+
+	pasid_state = get_pasid_state(dev_state, iommu_fault->pasid);
+	if (pasid_state == NULL) {
+		/* We know the device but not the PASID -> send INVALID */
+		amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid,
+				       PPR_INVALID, tag);
+		goto out_drop_state;
+	}
+
+	spin_lock_irqsave(&pasid_state->lock, flags);
+	atomic_inc(&pasid_state->pri[tag].inflight);
+	if (finish)
+		pasid_state->pri[tag].finish = true;
+	spin_unlock_irqrestore(&pasid_state->lock, flags);
+
+	fault = kzalloc(sizeof(*fault), GFP_ATOMIC);
+	if (fault == NULL) {
+		/* We are OOM - send success and let the device re-fault */
+		finish_pri_tag(dev_state, pasid_state, tag);
+		goto out_drop_state;
+	}
+
+	fault->dev_state = dev_state;
+	fault->address   = iommu_fault->address;
+	fault->state     = pasid_state;
+	fault->tag       = tag;
+	fault->finish    = finish;
+	fault->flags     = iommu_fault->flags;
+	INIT_WORK(&fault->work, do_fault);
+
+	queue_work(iommu_wq, &fault->work);
+
+	ret = NOTIFY_OK;
+
+out_drop_state:
+	put_device_state(dev_state);
+
+out:
+	return ret;
+}
+
+static struct notifier_block ppr_nb = {
+	.notifier_call = ppr_notifier,
+};
+
+static int task_exit(struct notifier_block *nb, unsigned long e, void *data)
+{
+	struct pasid_state *pasid_state;
+	struct task_struct *task;
+
+	task = data;
+
+	/*
+	 * Using this notifier is a hack - but there is no other choice
+	 * at the moment. What I really want is a sleeping notifier that
+	 * is called when an MM goes down. But such a notifier doesn't
+	 * exist yet. The notifier needs to sleep because it has to make
+	 * sure that the device does not use the PASID and the address
+	 * space anymore before it is destroyed. This includes waiting
+	 * for pending PRI requests to pass the workqueue. The
+	 * MMU-Notifiers would be a good fit, but they use RCU and so
+	 * they are not allowed to sleep. Lets see how we can solve this
+	 * in a more intelligent way in the future.
+	 */
+again:
+	spin_lock(&ps_lock);
+	list_for_each_entry(pasid_state, &pasid_state_list, list) {
+		struct device_state *dev_state;
+		int pasid;
+
+		if (pasid_state->task != task)
+			continue;
+
+		/* Drop Lock and unbind */
+		spin_unlock(&ps_lock);
+
+		dev_state = pasid_state->device_state;
+		pasid     = pasid_state->pasid;
+
+		if (pasid_state->device_state->inv_ctx_cb)
+			dev_state->inv_ctx_cb(dev_state->pdev, pasid);
+
+		unbind_pasid(dev_state, pasid);
+
+		/* Task may be in the list multiple times */
+		goto again;
+	}
+	spin_unlock(&ps_lock);
+
+	return NOTIFY_OK;
+}
+
+int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
+			 struct task_struct *task)
+{
+	struct pasid_state *pasid_state;
+	struct device_state *dev_state;
+	u16 devid;
+	int ret;
+
+	might_sleep();
+
+	if (!amd_iommu_v2_supported())
+		return -ENODEV;
+
+	devid     = device_id(pdev);
+	dev_state = get_device_state(devid);
+
+	if (dev_state == NULL)
+		return -EINVAL;
+
+	ret = -EINVAL;
+	if (pasid < 0 || pasid >= dev_state->max_pasids)
+		goto out;
+
+	ret = -ENOMEM;
+	pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL);
+	if (pasid_state == NULL)
+		goto out;
+
+	atomic_set(&pasid_state->count, 1);
+	init_waitqueue_head(&pasid_state->wq);
+	pasid_state->task         = task;
+	pasid_state->mm           = get_task_mm(task);
+	pasid_state->device_state = dev_state;
+	pasid_state->pasid        = pasid;
+	pasid_state->mn.ops       = &iommu_mn;
+
+	if (pasid_state->mm == NULL)
+		goto out_free;
+
+	mmu_notifier_register(&pasid_state->mn, pasid_state->mm);
+
+	ret = set_pasid_state(dev_state, pasid_state, pasid);
+	if (ret)
+		goto out_unregister;
+
+	ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid,
+					__pa(pasid_state->mm->pgd));
+	if (ret)
+		goto out_clear_state;
+
+	link_pasid_state(pasid_state);
+
+	return 0;
+
+out_clear_state:
+	clear_pasid_state(dev_state, pasid);
+
+out_unregister:
+	mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
+
+out_free:
+	free_pasid_state(pasid_state);
+
+out:
+	put_device_state(dev_state);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_bind_pasid);
+
+void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
+{
+	struct device_state *dev_state;
+	u16 devid;
+
+	might_sleep();
+
+	if (!amd_iommu_v2_supported())
+		return;
+
+	devid = device_id(pdev);
+	dev_state = get_device_state(devid);
+	if (dev_state == NULL)
+		return;
+
+	if (pasid < 0 || pasid >= dev_state->max_pasids)
+		goto out;
+
+	unbind_pasid(dev_state, pasid);
+
+out:
+	put_device_state(dev_state);
+}
+EXPORT_SYMBOL(amd_iommu_unbind_pasid);
+
+int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
+{
+	struct device_state *dev_state;
+	unsigned long flags;
+	int ret, tmp;
+	u16 devid;
+
+	might_sleep();
+
+	if (!amd_iommu_v2_supported())
+		return -ENODEV;
+
+	if (pasids <= 0 || pasids > (PASID_MASK + 1))
+		return -EINVAL;
+
+	devid = device_id(pdev);
+
+	dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL);
+	if (dev_state == NULL)
+		return -ENOMEM;
+
+	spin_lock_init(&dev_state->lock);
+	init_waitqueue_head(&dev_state->wq);
+	dev_state->pdev = pdev;
+
+	tmp = pasids;
+	for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9)
+		dev_state->pasid_levels += 1;
+
+	atomic_set(&dev_state->count, 1);
+	dev_state->max_pasids = pasids;
+
+	ret = -ENOMEM;
+	dev_state->states = (void *)get_zeroed_page(GFP_KERNEL);
+	if (dev_state->states == NULL)
+		goto out_free_dev_state;
+
+	dev_state->domain = iommu_domain_alloc(&pci_bus_type);
+	if (dev_state->domain == NULL)
+		goto out_free_states;
+
+	amd_iommu_domain_direct_map(dev_state->domain);
+
+	ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
+	if (ret)
+		goto out_free_domain;
+
+	ret = iommu_attach_device(dev_state->domain, &pdev->dev);
+	if (ret != 0)
+		goto out_free_domain;
+
+	spin_lock_irqsave(&state_lock, flags);
+
+	if (state_table[devid] != NULL) {
+		spin_unlock_irqrestore(&state_lock, flags);
+		ret = -EBUSY;
+		goto out_free_domain;
+	}
+
+	state_table[devid] = dev_state;
+
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	return 0;
+
+out_free_domain:
+	iommu_domain_free(dev_state->domain);
+
+out_free_states:
+	free_page((unsigned long)dev_state->states);
+
+out_free_dev_state:
+	kfree(dev_state);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_init_device);
+
+void amd_iommu_free_device(struct pci_dev *pdev)
+{
+	struct device_state *dev_state;
+	unsigned long flags;
+	u16 devid;
+
+	if (!amd_iommu_v2_supported())
+		return;
+
+	devid = device_id(pdev);
+
+	spin_lock_irqsave(&state_lock, flags);
+
+	dev_state = state_table[devid];
+	if (dev_state == NULL) {
+		spin_unlock_irqrestore(&state_lock, flags);
+		return;
+	}
+
+	state_table[devid] = NULL;
+
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	/* Get rid of any remaining pasid states */
+	free_pasid_states(dev_state);
+
+	put_device_state_wait(dev_state);
+}
+EXPORT_SYMBOL(amd_iommu_free_device);
+
+int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
+				 amd_iommu_invalid_ppr_cb cb)
+{
+	struct device_state *dev_state;
+	unsigned long flags;
+	u16 devid;
+	int ret;
+
+	if (!amd_iommu_v2_supported())
+		return -ENODEV;
+
+	devid = device_id(pdev);
+
+	spin_lock_irqsave(&state_lock, flags);
+
+	ret = -EINVAL;
+	dev_state = state_table[devid];
+	if (dev_state == NULL)
+		goto out_unlock;
+
+	dev_state->inv_ppr_cb = cb;
+
+	ret = 0;
+
+out_unlock:
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb);
+
+int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
+				    amd_iommu_invalidate_ctx cb)
+{
+	struct device_state *dev_state;
+	unsigned long flags;
+	u16 devid;
+	int ret;
+
+	if (!amd_iommu_v2_supported())
+		return -ENODEV;
+
+	devid = device_id(pdev);
+
+	spin_lock_irqsave(&state_lock, flags);
+
+	ret = -EINVAL;
+	dev_state = state_table[devid];
+	if (dev_state == NULL)
+		goto out_unlock;
+
+	dev_state->inv_ctx_cb = cb;
+
+	ret = 0;
+
+out_unlock:
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb);
+
+static int __init amd_iommu_v2_init(void)
+{
+	size_t state_table_size;
+	int ret;
+
+	pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>");
+
+	spin_lock_init(&state_lock);
+
+	state_table_size = MAX_DEVICES * sizeof(struct device_state *);
+	state_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					       get_order(state_table_size));
+	if (state_table == NULL)
+		return -ENOMEM;
+
+	ret = -ENOMEM;
+	iommu_wq = create_workqueue("amd_iommu_v2");
+	if (iommu_wq == NULL)
+		goto out_free;
+
+	ret = -ENOMEM;
+	empty_page_table = (u64 *)get_zeroed_page(GFP_KERNEL);
+	if (empty_page_table == NULL)
+		goto out_destroy_wq;
+
+	amd_iommu_register_ppr_notifier(&ppr_nb);
+	profile_event_register(PROFILE_TASK_EXIT, &profile_nb);
+
+	return 0;
+
+out_destroy_wq:
+	destroy_workqueue(iommu_wq);
+
+out_free:
+	free_pages((unsigned long)state_table, get_order(state_table_size));
+
+	return ret;
+}
+
+static void __exit amd_iommu_v2_exit(void)
+{
+	struct device_state *dev_state;
+	size_t state_table_size;
+	int i;
+
+	profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb);
+	amd_iommu_unregister_ppr_notifier(&ppr_nb);
+
+	flush_workqueue(iommu_wq);
+
+	/*
+	 * The loop below might call flush_workqueue(), so call
+	 * destroy_workqueue() after it
+	 */
+	for (i = 0; i < MAX_DEVICES; ++i) {
+		dev_state = get_device_state(i);
+
+		if (dev_state == NULL)
+			continue;
+
+		WARN_ON_ONCE(1);
+
+		put_device_state(dev_state);
+		amd_iommu_free_device(dev_state->pdev);
+	}
+
+	destroy_workqueue(iommu_wq);
+
+	state_table_size = MAX_DEVICES * sizeof(struct device_state *);
+	free_pages((unsigned long)state_table, get_order(state_table_size));
+
+	free_page((unsigned long)empty_page_table);
+}
+
+module_init(amd_iommu_v2_init);
+module_exit(amd_iommu_v2_exit);
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 6dcc7e2d54de..35c1e17fce1d 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -46,7 +46,7 @@
  */
 LIST_HEAD(dmar_drhd_units);
 
-static struct acpi_table_header * __initdata dmar_tbl;
+struct acpi_table_header * __initdata dmar_tbl;
 static acpi_size dmar_tbl_size;
 
 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
@@ -118,8 +118,8 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
 	return 0;
 }
 
-static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
-				       struct pci_dev ***devices, u16 segment)
+int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
+				struct pci_dev ***devices, u16 segment)
 {
 	struct acpi_dmar_device_scope *scope;
 	void * tmp = start;
@@ -217,133 +217,6 @@ static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
 	return ret;
 }
 
-#ifdef CONFIG_DMAR
-LIST_HEAD(dmar_rmrr_units);
-
-static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
-{
-	list_add(&rmrr->list, &dmar_rmrr_units);
-}
-
-
-static int __init
-dmar_parse_one_rmrr(struct acpi_dmar_header *header)
-{
-	struct acpi_dmar_reserved_memory *rmrr;
-	struct dmar_rmrr_unit *rmrru;
-
-	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
-	if (!rmrru)
-		return -ENOMEM;
-
-	rmrru->hdr = header;
-	rmrr = (struct acpi_dmar_reserved_memory *)header;
-	rmrru->base_address = rmrr->base_address;
-	rmrru->end_address = rmrr->end_address;
-
-	dmar_register_rmrr_unit(rmrru);
-	return 0;
-}
-
-static int __init
-rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
-{
-	struct acpi_dmar_reserved_memory *rmrr;
-	int ret;
-
-	rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
-	ret = dmar_parse_dev_scope((void *)(rmrr + 1),
-		((void *)rmrr) + rmrr->header.length,
-		&rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
-
-	if (ret || (rmrru->devices_cnt == 0)) {
-		list_del(&rmrru->list);
-		kfree(rmrru);
-	}
-	return ret;
-}
-
-static LIST_HEAD(dmar_atsr_units);
-
-static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
-{
-	struct acpi_dmar_atsr *atsr;
-	struct dmar_atsr_unit *atsru;
-
-	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
-	atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
-	if (!atsru)
-		return -ENOMEM;
-
-	atsru->hdr = hdr;
-	atsru->include_all = atsr->flags & 0x1;
-
-	list_add(&atsru->list, &dmar_atsr_units);
-
-	return 0;
-}
-
-static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
-{
-	int rc;
-	struct acpi_dmar_atsr *atsr;
-
-	if (atsru->include_all)
-		return 0;
-
-	atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
-	rc = dmar_parse_dev_scope((void *)(atsr + 1),
-				(void *)atsr + atsr->header.length,
-				&atsru->devices_cnt, &atsru->devices,
-				atsr->segment);
-	if (rc || !atsru->devices_cnt) {
-		list_del(&atsru->list);
-		kfree(atsru);
-	}
-
-	return rc;
-}
-
-int dmar_find_matched_atsr_unit(struct pci_dev *dev)
-{
-	int i;
-	struct pci_bus *bus;
-	struct acpi_dmar_atsr *atsr;
-	struct dmar_atsr_unit *atsru;
-
-	dev = pci_physfn(dev);
-
-	list_for_each_entry(atsru, &dmar_atsr_units, list) {
-		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
-		if (atsr->segment == pci_domain_nr(dev->bus))
-			goto found;
-	}
-
-	return 0;
-
-found:
-	for (bus = dev->bus; bus; bus = bus->parent) {
-		struct pci_dev *bridge = bus->self;
-
-		if (!bridge || !pci_is_pcie(bridge) ||
-		    bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
-			return 0;
-
-		if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
-			for (i = 0; i < atsru->devices_cnt; i++)
-				if (atsru->devices[i] == bridge)
-					return 1;
-			break;
-		}
-	}
-
-	if (atsru->include_all)
-		return 1;
-
-	return 0;
-}
-#endif
-
 #ifdef CONFIG_ACPI_NUMA
 static int __init
 dmar_parse_one_rhsa(struct acpi_dmar_header *header)
@@ -484,14 +357,10 @@ parse_dmar_table(void)
 			ret = dmar_parse_one_drhd(entry_header);
 			break;
 		case ACPI_DMAR_TYPE_RESERVED_MEMORY:
-#ifdef CONFIG_DMAR
 			ret = dmar_parse_one_rmrr(entry_header);
-#endif
 			break;
 		case ACPI_DMAR_TYPE_ATSR:
-#ifdef CONFIG_DMAR
 			ret = dmar_parse_one_atsr(entry_header);
-#endif
 			break;
 		case ACPI_DMAR_HARDWARE_AFFINITY:
 #ifdef CONFIG_ACPI_NUMA
@@ -557,34 +426,31 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev)
 
 int __init dmar_dev_scope_init(void)
 {
+	static int dmar_dev_scope_initialized;
 	struct dmar_drhd_unit *drhd, *drhd_n;
 	int ret = -ENODEV;
 
+	if (dmar_dev_scope_initialized)
+		return dmar_dev_scope_initialized;
+
+	if (list_empty(&dmar_drhd_units))
+		goto fail;
+
 	list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
 		ret = dmar_parse_dev(drhd);
 		if (ret)
-			return ret;
+			goto fail;
 	}
 
-#ifdef CONFIG_DMAR
-	{
-		struct dmar_rmrr_unit *rmrr, *rmrr_n;
-		struct dmar_atsr_unit *atsr, *atsr_n;
-
-		list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
-			ret = rmrr_parse_dev(rmrr);
-			if (ret)
-				return ret;
-		}
+	ret = dmar_parse_rmrr_atsr_dev();
+	if (ret)
+		goto fail;
 
-		list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
-			ret = atsr_parse_dev(atsr);
-			if (ret)
-				return ret;
-		}
-	}
-#endif
+	dmar_dev_scope_initialized = 1;
+	return 0;
 
+fail:
+	dmar_dev_scope_initialized = ret;
 	return ret;
 }
 
@@ -611,14 +477,6 @@ int __init dmar_table_init(void)
 		return -ENODEV;
 	}
 
-#ifdef CONFIG_DMAR
-	if (list_empty(&dmar_rmrr_units))
-		printk(KERN_INFO PREFIX "No RMRR found\n");
-
-	if (list_empty(&dmar_atsr_units))
-		printk(KERN_INFO PREFIX "No ATSR found\n");
-#endif
-
 	return 0;
 }
 
@@ -682,9 +540,6 @@ int __init check_zero_address(void)
 	return 1;
 
 failed:
-#ifdef CONFIG_DMAR
-	dmar_disabled = 1;
-#endif
 	return 0;
 }
 
@@ -696,22 +551,21 @@ int __init detect_intel_iommu(void)
 	if (ret)
 		ret = check_zero_address();
 	{
-#ifdef CONFIG_INTR_REMAP
 		struct acpi_table_dmar *dmar;
 
 		dmar = (struct acpi_table_dmar *) dmar_tbl;
-		if (ret && cpu_has_x2apic && dmar->flags & 0x1)
+
+		if (ret && intr_remapping_enabled && cpu_has_x2apic &&
+		    dmar->flags & 0x1)
 			printk(KERN_INFO
-			       "Queued invalidation will be enabled to support "
-			       "x2apic and Intr-remapping.\n");
-#endif
-#ifdef CONFIG_DMAR
+			       "Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
+
 		if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
 			iommu_detected = 1;
 			/* Make sure ACS will be enabled */
 			pci_request_acs();
 		}
-#endif
+
 #ifdef CONFIG_X86
 		if (ret)
 			x86_init.iommu.iommu_init = intel_iommu_init;
@@ -758,7 +612,6 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 		goto err_unmap;
 	}
 
-#ifdef CONFIG_DMAR
 	agaw = iommu_calculate_agaw(iommu);
 	if (agaw < 0) {
 		printk(KERN_ERR
@@ -773,7 +626,6 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 			iommu->seq_id);
 		goto err_unmap;
 	}
-#endif
 	iommu->agaw = agaw;
 	iommu->msagaw = msagaw;
 
@@ -800,7 +652,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 		(unsigned long long)iommu->cap,
 		(unsigned long long)iommu->ecap);
 
-	spin_lock_init(&iommu->register_lock);
+	raw_spin_lock_init(&iommu->register_lock);
 
 	drhd->iommu = iommu;
 	return 0;
@@ -817,9 +669,7 @@ void free_iommu(struct intel_iommu *iommu)
 	if (!iommu)
 		return;
 
-#ifdef CONFIG_DMAR
 	free_dmar_iommu(iommu);
-#endif
 
 	if (iommu->reg)
 		iounmap(iommu->reg);
@@ -921,11 +771,11 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
 restart:
 	rc = 0;
 
-	spin_lock_irqsave(&qi->q_lock, flags);
+	raw_spin_lock_irqsave(&qi->q_lock, flags);
 	while (qi->free_cnt < 3) {
-		spin_unlock_irqrestore(&qi->q_lock, flags);
+		raw_spin_unlock_irqrestore(&qi->q_lock, flags);
 		cpu_relax();
-		spin_lock_irqsave(&qi->q_lock, flags);
+		raw_spin_lock_irqsave(&qi->q_lock, flags);
 	}
 
 	index = qi->free_head;
@@ -965,15 +815,15 @@ restart:
 		if (rc)
 			break;
 
-		spin_unlock(&qi->q_lock);
+		raw_spin_unlock(&qi->q_lock);
 		cpu_relax();
-		spin_lock(&qi->q_lock);
+		raw_spin_lock(&qi->q_lock);
 	}
 
 	qi->desc_status[index] = QI_DONE;
 
 	reclaim_free_desc(qi);
-	spin_unlock_irqrestore(&qi->q_lock, flags);
+	raw_spin_unlock_irqrestore(&qi->q_lock, flags);
 
 	if (rc == -EAGAIN)
 		goto restart;
@@ -1062,7 +912,7 @@ void dmar_disable_qi(struct intel_iommu *iommu)
 	if (!ecap_qis(iommu->ecap))
 		return;
 
-	spin_lock_irqsave(&iommu->register_lock, flags);
+	raw_spin_lock_irqsave(&iommu->register_lock, flags);
 
 	sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
 	if (!(sts & DMA_GSTS_QIES))
@@ -1082,7 +932,7 @@ void dmar_disable_qi(struct intel_iommu *iommu)
 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
 		      !(sts & DMA_GSTS_QIES), sts);
 end:
-	spin_unlock_irqrestore(&iommu->register_lock, flags);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
 /*
@@ -1097,7 +947,7 @@ static void __dmar_enable_qi(struct intel_iommu *iommu)
 	qi->free_head = qi->free_tail = 0;
 	qi->free_cnt = QI_LENGTH;
 
-	spin_lock_irqsave(&iommu->register_lock, flags);
+	raw_spin_lock_irqsave(&iommu->register_lock, flags);
 
 	/* write zero to the tail reg */
 	writel(0, iommu->reg + DMAR_IQT_REG);
@@ -1110,7 +960,7 @@ static void __dmar_enable_qi(struct intel_iommu *iommu)
 	/* Make sure hardware complete it */
 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
 
-	spin_unlock_irqrestore(&iommu->register_lock, flags);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
 /*
@@ -1159,7 +1009,7 @@ int dmar_enable_qi(struct intel_iommu *iommu)
 	qi->free_head = qi->free_tail = 0;
 	qi->free_cnt = QI_LENGTH;
 
-	spin_lock_init(&qi->q_lock);
+	raw_spin_lock_init(&qi->q_lock);
 
 	__dmar_enable_qi(iommu);
 
@@ -1225,11 +1075,11 @@ void dmar_msi_unmask(struct irq_data *data)
 	unsigned long flag;
 
 	/* unmask it */
-	spin_lock_irqsave(&iommu->register_lock, flag);
+	raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	writel(0, iommu->reg + DMAR_FECTL_REG);
 	/* Read a reg to force flush the post write */
 	readl(iommu->reg + DMAR_FECTL_REG);
-	spin_unlock_irqrestore(&iommu->register_lock, flag);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
 void dmar_msi_mask(struct irq_data *data)
@@ -1238,11 +1088,11 @@ void dmar_msi_mask(struct irq_data *data)
 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
 
 	/* mask it */
-	spin_lock_irqsave(&iommu->register_lock, flag);
+	raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
 	/* Read a reg to force flush the post write */
 	readl(iommu->reg + DMAR_FECTL_REG);
-	spin_unlock_irqrestore(&iommu->register_lock, flag);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
 void dmar_msi_write(int irq, struct msi_msg *msg)
@@ -1250,11 +1100,11 @@ void dmar_msi_write(int irq, struct msi_msg *msg)
 	struct intel_iommu *iommu = irq_get_handler_data(irq);
 	unsigned long flag;
 
-	spin_lock_irqsave(&iommu->register_lock, flag);
+	raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
 	writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
 	writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
-	spin_unlock_irqrestore(&iommu->register_lock, flag);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
 void dmar_msi_read(int irq, struct msi_msg *msg)
@@ -1262,11 +1112,11 @@ void dmar_msi_read(int irq, struct msi_msg *msg)
 	struct intel_iommu *iommu = irq_get_handler_data(irq);
 	unsigned long flag;
 
-	spin_lock_irqsave(&iommu->register_lock, flag);
+	raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
 	msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
 	msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
-	spin_unlock_irqrestore(&iommu->register_lock, flag);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
@@ -1303,7 +1153,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
 	u32 fault_status;
 	unsigned long flag;
 
-	spin_lock_irqsave(&iommu->register_lock, flag);
+	raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
 	if (fault_status)
 		printk(KERN_ERR "DRHD: handling fault status reg %x\n",
@@ -1342,7 +1192,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
 		writel(DMA_FRCD_F, iommu->reg + reg +
 			fault_index * PRIMARY_FAULT_REG_LEN + 12);
 
-		spin_unlock_irqrestore(&iommu->register_lock, flag);
+		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 
 		dmar_fault_do_one(iommu, type, fault_reason,
 				source_id, guest_addr);
@@ -1350,14 +1200,14 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
 		fault_index++;
 		if (fault_index >= cap_num_fault_regs(iommu->cap))
 			fault_index = 0;
-		spin_lock_irqsave(&iommu->register_lock, flag);
+		raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	}
 clear_rest:
 	/* clear all the other faults */
 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
 	writel(fault_status, iommu->reg + DMAR_FSTS_REG);
 
-	spin_unlock_irqrestore(&iommu->register_lock, flag);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a88f3cbb100b..77e3b917c8da 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -24,6 +24,7 @@
 #include <linux/init.h>
 #include <linux/bitmap.h>
 #include <linux/debugfs.h>
+#include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
@@ -77,6 +78,24 @@
 #define LEVEL_STRIDE		(9)
 #define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
 
+/*
+ * This bitmap is used to advertise the page sizes our hardware support
+ * to the IOMMU core, which will then use this information to split
+ * physically contiguous memory regions it is mapping into page sizes
+ * that we support.
+ *
+ * Traditionally the IOMMU core just handed us the mappings directly,
+ * after making sure the size is an order of a 4KiB page and that the
+ * mapping has natural alignment.
+ *
+ * To retain this behavior, we currently advertise that we support
+ * all page sizes that are an order of 4KiB.
+ *
+ * If at some point we'd like to utilize the IOMMU core's new behavior,
+ * we could change this to advertise the real page sizes we support.
+ */
+#define INTEL_IOMMU_PGSIZES	(~0xFFFUL)
+
 static inline int agaw_to_level(int agaw)
 {
 	return agaw + 2;
@@ -398,11 +417,14 @@ static long list_size;
 
 static void domain_remove_dev_info(struct dmar_domain *domain);
 
-#ifdef CONFIG_DMAR_DEFAULT_ON
+#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
 int dmar_disabled = 0;
 #else
 int dmar_disabled = 1;
-#endif /*CONFIG_DMAR_DEFAULT_ON*/
+#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
+
+int intel_iommu_enabled = 0;
+EXPORT_SYMBOL_GPL(intel_iommu_enabled);
 
 static int dmar_map_gfx = 1;
 static int dmar_forcedac;
@@ -939,7 +961,7 @@ static void iommu_set_root_entry(struct intel_iommu *iommu)
 
 	addr = iommu->root_entry;
 
-	spin_lock_irqsave(&iommu->register_lock, flag);
+	raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
 
 	writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
@@ -948,7 +970,7 @@ static void iommu_set_root_entry(struct intel_iommu *iommu)
 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 		      readl, (sts & DMA_GSTS_RTPS), sts);
 
-	spin_unlock_irqrestore(&iommu->register_lock, flag);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
@@ -959,14 +981,14 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu)
 	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
 		return;
 
-	spin_lock_irqsave(&iommu->register_lock, flag);
+	raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
 
 	/* Make sure hardware complete it */
 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 		      readl, (!(val & DMA_GSTS_WBFS)), val);
 
-	spin_unlock_irqrestore(&iommu->register_lock, flag);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
 /* return value determine if we need a write buffer flush */
@@ -993,14 +1015,14 @@ static void __iommu_flush_context(struct intel_iommu *iommu,
 	}
 	val |= DMA_CCMD_ICC;
 
-	spin_lock_irqsave(&iommu->register_lock, flag);
+	raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
 
 	/* Make sure hardware complete it */
 	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
 		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
 
-	spin_unlock_irqrestore(&iommu->register_lock, flag);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
 /* return value determine if we need a write buffer flush */
@@ -1039,7 +1061,7 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
 	if (cap_write_drain(iommu->cap))
 		val |= DMA_TLB_WRITE_DRAIN;
 
-	spin_lock_irqsave(&iommu->register_lock, flag);
+	raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	/* Note: Only uses first TLB reg currently */
 	if (val_iva)
 		dmar_writeq(iommu->reg + tlb_offset, val_iva);
@@ -1049,7 +1071,7 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
 	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
 		dmar_readq, (!(val & DMA_TLB_IVT)), val);
 
-	spin_unlock_irqrestore(&iommu->register_lock, flag);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 
 	/* check IOTLB invalidation granularity */
 	if (DMA_TLB_IAIG(val) == 0)
@@ -1165,7 +1187,7 @@ static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
 	u32 pmen;
 	unsigned long flags;
 
-	spin_lock_irqsave(&iommu->register_lock, flags);
+	raw_spin_lock_irqsave(&iommu->register_lock, flags);
 	pmen = readl(iommu->reg + DMAR_PMEN_REG);
 	pmen &= ~DMA_PMEN_EPM;
 	writel(pmen, iommu->reg + DMAR_PMEN_REG);
@@ -1174,7 +1196,7 @@ static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
 	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
 		readl, !(pmen & DMA_PMEN_PRS), pmen);
 
-	spin_unlock_irqrestore(&iommu->register_lock, flags);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
 static int iommu_enable_translation(struct intel_iommu *iommu)
@@ -1182,7 +1204,7 @@ static int iommu_enable_translation(struct intel_iommu *iommu)
 	u32 sts;
 	unsigned long flags;
 
-	spin_lock_irqsave(&iommu->register_lock, flags);
+	raw_spin_lock_irqsave(&iommu->register_lock, flags);
 	iommu->gcmd |= DMA_GCMD_TE;
 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
 
@@ -1190,7 +1212,7 @@ static int iommu_enable_translation(struct intel_iommu *iommu)
 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 		      readl, (sts & DMA_GSTS_TES), sts);
 
-	spin_unlock_irqrestore(&iommu->register_lock, flags);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 	return 0;
 }
 
@@ -1199,7 +1221,7 @@ static int iommu_disable_translation(struct intel_iommu *iommu)
 	u32 sts;
 	unsigned long flag;
 
-	spin_lock_irqsave(&iommu->register_lock, flag);
+	raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	iommu->gcmd &= ~DMA_GCMD_TE;
 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
 
@@ -1207,7 +1229,7 @@ static int iommu_disable_translation(struct intel_iommu *iommu)
 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 		      readl, (!(sts & DMA_GSTS_TES)), sts);
 
-	spin_unlock_irqrestore(&iommu->register_lock, flag);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 	return 0;
 }
 
@@ -2157,7 +2179,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
 		rmrr->end_address);
 }
 
-#ifdef CONFIG_DMAR_FLOPPY_WA
+#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
 static inline void iommu_prepare_isa(void)
 {
 	struct pci_dev *pdev;
@@ -2180,7 +2202,7 @@ static inline void iommu_prepare_isa(void)
 {
 	return;
 }
-#endif /* !CONFIG_DMAR_FLPY_WA */
+#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
 
 static int md_domain_init(struct dmar_domain *domain, int guest_width);
 
@@ -2491,7 +2513,7 @@ static int __init init_dmars(void)
 	if (iommu_pass_through)
 		iommu_identity_mapping |= IDENTMAP_ALL;
 
-#ifdef CONFIG_DMAR_BROKEN_GFX_WA
+#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
 	iommu_identity_mapping |= IDENTMAP_GFX;
 #endif
 
@@ -3329,7 +3351,7 @@ static int iommu_suspend(void)
 	for_each_active_iommu(iommu, drhd) {
 		iommu_disable_translation(iommu);
 
-		spin_lock_irqsave(&iommu->register_lock, flag);
+		raw_spin_lock_irqsave(&iommu->register_lock, flag);
 
 		iommu->iommu_state[SR_DMAR_FECTL_REG] =
 			readl(iommu->reg + DMAR_FECTL_REG);
@@ -3340,7 +3362,7 @@ static int iommu_suspend(void)
 		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
 			readl(iommu->reg + DMAR_FEUADDR_REG);
 
-		spin_unlock_irqrestore(&iommu->register_lock, flag);
+		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 	}
 	return 0;
 
@@ -3367,7 +3389,7 @@ static void iommu_resume(void)
 
 	for_each_active_iommu(iommu, drhd) {
 
-		spin_lock_irqsave(&iommu->register_lock, flag);
+		raw_spin_lock_irqsave(&iommu->register_lock, flag);
 
 		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
 			iommu->reg + DMAR_FECTL_REG);
@@ -3378,7 +3400,7 @@ static void iommu_resume(void)
 		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
 			iommu->reg + DMAR_FEUADDR_REG);
 
-		spin_unlock_irqrestore(&iommu->register_lock, flag);
+		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 	}
 
 	for_each_active_iommu(iommu, drhd)
@@ -3399,6 +3421,151 @@ static void __init init_iommu_pm_ops(void)
 static inline void init_iommu_pm_ops(void) {}
 #endif	/* CONFIG_PM */
 
+LIST_HEAD(dmar_rmrr_units);
+
+static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
+{
+	list_add(&rmrr->list, &dmar_rmrr_units);
+}
+
+
+int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
+{
+	struct acpi_dmar_reserved_memory *rmrr;
+	struct dmar_rmrr_unit *rmrru;
+
+	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
+	if (!rmrru)
+		return -ENOMEM;
+
+	rmrru->hdr = header;
+	rmrr = (struct acpi_dmar_reserved_memory *)header;
+	rmrru->base_address = rmrr->base_address;
+	rmrru->end_address = rmrr->end_address;
+
+	dmar_register_rmrr_unit(rmrru);
+	return 0;
+}
+
+static int __init
+rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
+{
+	struct acpi_dmar_reserved_memory *rmrr;
+	int ret;
+
+	rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
+	ret = dmar_parse_dev_scope((void *)(rmrr + 1),
+		((void *)rmrr) + rmrr->header.length,
+		&rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
+
+	if (ret || (rmrru->devices_cnt == 0)) {
+		list_del(&rmrru->list);
+		kfree(rmrru);
+	}
+	return ret;
+}
+
+static LIST_HEAD(dmar_atsr_units);
+
+int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
+{
+	struct acpi_dmar_atsr *atsr;
+	struct dmar_atsr_unit *atsru;
+
+	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
+	atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
+	if (!atsru)
+		return -ENOMEM;
+
+	atsru->hdr = hdr;
+	atsru->include_all = atsr->flags & 0x1;
+
+	list_add(&atsru->list, &dmar_atsr_units);
+
+	return 0;
+}
+
+static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
+{
+	int rc;
+	struct acpi_dmar_atsr *atsr;
+
+	if (atsru->include_all)
+		return 0;
+
+	atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
+	rc = dmar_parse_dev_scope((void *)(atsr + 1),
+				(void *)atsr + atsr->header.length,
+				&atsru->devices_cnt, &atsru->devices,
+				atsr->segment);
+	if (rc || !atsru->devices_cnt) {
+		list_del(&atsru->list);
+		kfree(atsru);
+	}
+
+	return rc;
+}
+
+int dmar_find_matched_atsr_unit(struct pci_dev *dev)
+{
+	int i;
+	struct pci_bus *bus;
+	struct acpi_dmar_atsr *atsr;
+	struct dmar_atsr_unit *atsru;
+
+	dev = pci_physfn(dev);
+
+	list_for_each_entry(atsru, &dmar_atsr_units, list) {
+		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
+		if (atsr->segment == pci_domain_nr(dev->bus))
+			goto found;
+	}
+
+	return 0;
+
+found:
+	for (bus = dev->bus; bus; bus = bus->parent) {
+		struct pci_dev *bridge = bus->self;
+
+		if (!bridge || !pci_is_pcie(bridge) ||
+		    bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
+			return 0;
+
+		if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
+			for (i = 0; i < atsru->devices_cnt; i++)
+				if (atsru->devices[i] == bridge)
+					return 1;
+			break;
+		}
+	}
+
+	if (atsru->include_all)
+		return 1;
+
+	return 0;
+}
+
+int __init dmar_parse_rmrr_atsr_dev(void)
+{
+	struct dmar_rmrr_unit *rmrr, *rmrr_n;
+	struct dmar_atsr_unit *atsr, *atsr_n;
+	int ret = 0;
+
+	list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
+		ret = rmrr_parse_dev(rmrr);
+		if (ret)
+			return ret;
+	}
+
+	list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
+		ret = atsr_parse_dev(atsr);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
 /*
  * Here we only respond to action of unbound device from driver.
  *
@@ -3448,16 +3615,12 @@ int __init intel_iommu_init(void)
 		return 	-ENODEV;
 	}
 
-	if (dmar_dev_scope_init()) {
+	if (dmar_dev_scope_init() < 0) {
 		if (force_on)
 			panic("tboot: Failed to initialize DMAR device scope\n");
 		return 	-ENODEV;
 	}
 
-	/*
-	 * Check the need for DMA-remapping initialization now.
-	 * Above initialization will also be used by Interrupt-remapping.
-	 */
 	if (no_iommu || dmar_disabled)
 		return -ENODEV;
 
@@ -3467,6 +3630,12 @@ int __init intel_iommu_init(void)
 		return 	-ENODEV;
 	}
 
+	if (list_empty(&dmar_rmrr_units))
+		printk(KERN_INFO "DMAR: No RMRR found\n");
+
+	if (list_empty(&dmar_atsr_units))
+		printk(KERN_INFO "DMAR: No ATSR found\n");
+
 	if (dmar_init_reserved_ranges()) {
 		if (force_on)
 			panic("tboot: Failed to reserve iommu ranges\n");
@@ -3495,10 +3664,12 @@ int __init intel_iommu_init(void)
 
 	init_iommu_pm_ops();
 
-	register_iommu(&intel_iommu_ops);
+	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
 
 	bus_register_notifier(&pci_bus_type, &device_nb);
 
+	intel_iommu_enabled = 1;
+
 	return 0;
 }
 
@@ -3831,12 +4002,11 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
 
 static int intel_iommu_map(struct iommu_domain *domain,
 			   unsigned long iova, phys_addr_t hpa,
-			   int gfp_order, int iommu_prot)
+			   size_t size, int iommu_prot)
 {
 	struct dmar_domain *dmar_domain = domain->priv;
 	u64 max_addr;
 	int prot = 0;
-	size_t size;
 	int ret;
 
 	if (iommu_prot & IOMMU_READ)
@@ -3846,7 +4016,6 @@ static int intel_iommu_map(struct iommu_domain *domain,
 	if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
 		prot |= DMA_PTE_SNP;
 
-	size     = PAGE_SIZE << gfp_order;
 	max_addr = iova + size;
 	if (dmar_domain->max_addr < max_addr) {
 		u64 end;
@@ -3869,11 +4038,10 @@ static int intel_iommu_map(struct iommu_domain *domain,
 	return ret;
 }
 
-static int intel_iommu_unmap(struct iommu_domain *domain,
-			     unsigned long iova, int gfp_order)
+static size_t intel_iommu_unmap(struct iommu_domain *domain,
+			     unsigned long iova, size_t size)
 {
 	struct dmar_domain *dmar_domain = domain->priv;
-	size_t size = PAGE_SIZE << gfp_order;
 	int order;
 
 	order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
@@ -3882,7 +4050,7 @@ static int intel_iommu_unmap(struct iommu_domain *domain,
 	if (dmar_domain->max_addr == iova + size)
 		dmar_domain->max_addr = iova;
 
-	return order;
+	return PAGE_SIZE << order;
 }
 
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -3912,6 +4080,54 @@ static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
 	return 0;
 }
 
+/*
+ * Group numbers are arbitrary.  Device with the same group number
+ * indicate the iommu cannot differentiate between them.  To avoid
+ * tracking used groups we just use the seg|bus|devfn of the lowest
+ * level we're able to differentiate devices
+ */
+static int intel_iommu_device_group(struct device *dev, unsigned int *groupid)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_dev *bridge;
+	union {
+		struct {
+			u8 devfn;
+			u8 bus;
+			u16 segment;
+		} pci;
+		u32 group;
+	} id;
+
+	if (iommu_no_mapping(dev))
+		return -ENODEV;
+
+	id.pci.segment = pci_domain_nr(pdev->bus);
+	id.pci.bus = pdev->bus->number;
+	id.pci.devfn = pdev->devfn;
+
+	if (!device_to_iommu(id.pci.segment, id.pci.bus, id.pci.devfn))
+		return -ENODEV;
+
+	bridge = pci_find_upstream_pcie_bridge(pdev);
+	if (bridge) {
+		if (pci_is_pcie(bridge)) {
+			id.pci.bus = bridge->subordinate->number;
+			id.pci.devfn = 0;
+		} else {
+			id.pci.bus = bridge->bus->number;
+			id.pci.devfn = bridge->devfn;
+		}
+	}
+
+	if (!pdev->is_virtfn && iommu_group_mf)
+		id.pci.devfn = PCI_DEVFN(PCI_SLOT(id.pci.devfn), 0);
+
+	*groupid = id.group;
+
+	return 0;
+}
+
 static struct iommu_ops intel_iommu_ops = {
 	.domain_init	= intel_iommu_domain_init,
 	.domain_destroy = intel_iommu_domain_destroy,
@@ -3921,6 +4137,8 @@ static struct iommu_ops intel_iommu_ops = {
 	.unmap		= intel_iommu_unmap,
 	.iova_to_phys	= intel_iommu_iova_to_phys,
 	.domain_has_cap = intel_iommu_domain_has_cap,
+	.device_group	= intel_iommu_device_group,
+	.pgsize_bitmap	= INTEL_IOMMU_PGSIZES,
 };
 
 static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
diff --git a/drivers/iommu/intr_remapping.c b/drivers/iommu/intr_remapping.c
index 1a89d4a2cadf..6777ca049471 100644
--- a/drivers/iommu/intr_remapping.c
+++ b/drivers/iommu/intr_remapping.c
@@ -21,6 +21,7 @@ int intr_remapping_enabled;
 
 static int disable_intremap;
 static int disable_sourceid_checking;
+static int no_x2apic_optout;
 
 static __init int setup_nointremap(char *str)
 {
@@ -34,18 +35,26 @@ static __init int setup_intremap(char *str)
 	if (!str)
 		return -EINVAL;
 
-	if (!strncmp(str, "on", 2))
-		disable_intremap = 0;
-	else if (!strncmp(str, "off", 3))
-		disable_intremap = 1;
-	else if (!strncmp(str, "nosid", 5))
-		disable_sourceid_checking = 1;
+	while (*str) {
+		if (!strncmp(str, "on", 2))
+			disable_intremap = 0;
+		else if (!strncmp(str, "off", 3))
+			disable_intremap = 1;
+		else if (!strncmp(str, "nosid", 5))
+			disable_sourceid_checking = 1;
+		else if (!strncmp(str, "no_x2apic_optout", 16))
+			no_x2apic_optout = 1;
+
+		str += strcspn(str, ",");
+		while (*str == ',')
+			str++;
+	}
 
 	return 0;
 }
 early_param("intremap", setup_intremap);
 
-static DEFINE_SPINLOCK(irq_2_ir_lock);
+static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
 
 static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
 {
@@ -62,12 +71,12 @@ int get_irte(int irq, struct irte *entry)
 	if (!entry || !irq_iommu)
 		return -1;
 
-	spin_lock_irqsave(&irq_2_ir_lock, flags);
+	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 
 	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 	*entry = *(irq_iommu->iommu->ir_table->base + index);
 
-	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 	return 0;
 }
 
@@ -101,7 +110,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 		return -1;
 	}
 
-	spin_lock_irqsave(&irq_2_ir_lock, flags);
+	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 	do {
 		for (i = index; i < index + count; i++)
 			if  (table->base[i].present)
@@ -113,7 +122,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 		index = (index + count) % INTR_REMAP_TABLE_ENTRIES;
 
 		if (index == start_index) {
-			spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+			raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 			printk(KERN_ERR "can't allocate an IRTE\n");
 			return -1;
 		}
@@ -127,7 +136,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 	irq_iommu->sub_handle = 0;
 	irq_iommu->irte_mask = mask;
 
-	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 
 	return index;
 }
@@ -152,10 +161,10 @@ int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 	if (!irq_iommu)
 		return -1;
 
-	spin_lock_irqsave(&irq_2_ir_lock, flags);
+	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 	*sub_handle = irq_iommu->sub_handle;
 	index = irq_iommu->irte_index;
-	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 	return index;
 }
 
@@ -167,14 +176,14 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 	if (!irq_iommu)
 		return -1;
 
-	spin_lock_irqsave(&irq_2_ir_lock, flags);
+	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 
 	irq_iommu->iommu = iommu;
 	irq_iommu->irte_index = index;
 	irq_iommu->sub_handle = subhandle;
 	irq_iommu->irte_mask = 0;
 
-	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 
 	return 0;
 }
@@ -190,7 +199,7 @@ int modify_irte(int irq, struct irte *irte_modified)
 	if (!irq_iommu)
 		return -1;
 
-	spin_lock_irqsave(&irq_2_ir_lock, flags);
+	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 
 	iommu = irq_iommu->iommu;
 
@@ -202,7 +211,7 @@ int modify_irte(int irq, struct irte *irte_modified)
 	__iommu_flush_cache(iommu, irte, sizeof(*irte));
 
 	rc = qi_flush_iec(iommu, index, 0);
-	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 
 	return rc;
 }
@@ -270,7 +279,7 @@ int free_irte(int irq)
 	if (!irq_iommu)
 		return -1;
 
-	spin_lock_irqsave(&irq_2_ir_lock, flags);
+	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 
 	rc = clear_entries(irq_iommu);
 
@@ -279,7 +288,7 @@ int free_irte(int irq)
 	irq_iommu->sub_handle = 0;
 	irq_iommu->irte_mask = 0;
 
-	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+	raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 
 	return rc;
 }
@@ -409,7 +418,7 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
 
 	addr = virt_to_phys((void *)iommu->ir_table->base);
 
-	spin_lock_irqsave(&iommu->register_lock, flags);
+	raw_spin_lock_irqsave(&iommu->register_lock, flags);
 
 	dmar_writeq(iommu->reg + DMAR_IRTA_REG,
 		    (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
@@ -420,7 +429,7 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
 
 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 		      readl, (sts & DMA_GSTS_IRTPS), sts);
-	spin_unlock_irqrestore(&iommu->register_lock, flags);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 
 	/*
 	 * global invalidation of interrupt entry cache before enabling
@@ -428,7 +437,7 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
 	 */
 	qi_global_iec(iommu);
 
-	spin_lock_irqsave(&iommu->register_lock, flags);
+	raw_spin_lock_irqsave(&iommu->register_lock, flags);
 
 	/* Enable interrupt-remapping */
 	iommu->gcmd |= DMA_GCMD_IRE;
@@ -437,7 +446,7 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 		      readl, (sts & DMA_GSTS_IRES), sts);
 
-	spin_unlock_irqrestore(&iommu->register_lock, flags);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
 
@@ -485,7 +494,7 @@ static void iommu_disable_intr_remapping(struct intel_iommu *iommu)
 	 */
 	qi_global_iec(iommu);
 
-	spin_lock_irqsave(&iommu->register_lock, flags);
+	raw_spin_lock_irqsave(&iommu->register_lock, flags);
 
 	sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
 	if (!(sts & DMA_GSTS_IRES))
@@ -498,7 +507,16 @@ static void iommu_disable_intr_remapping(struct intel_iommu *iommu)
 		      readl, !(sts & DMA_GSTS_IRES), sts);
 
 end:
-	spin_unlock_irqrestore(&iommu->register_lock, flags);
+	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
+
+static int __init dmar_x2apic_optout(void)
+{
+	struct acpi_table_dmar *dmar;
+	dmar = (struct acpi_table_dmar *)dmar_tbl;
+	if (!dmar || no_x2apic_optout)
+		return 0;
+	return dmar->flags & DMAR_X2APIC_OPT_OUT;
 }
 
 int __init intr_remapping_supported(void)
@@ -521,16 +539,25 @@ int __init intr_remapping_supported(void)
 	return 1;
 }
 
-int __init enable_intr_remapping(int eim)
+int __init enable_intr_remapping(void)
 {
 	struct dmar_drhd_unit *drhd;
 	int setup = 0;
+	int eim = 0;
 
 	if (parse_ioapics_under_ir() != 1) {
 		printk(KERN_INFO "Not enable interrupt remapping\n");
 		return -1;
 	}
 
+	if (x2apic_supported()) {
+		eim = !dmar_x2apic_optout();
+		WARN(!eim, KERN_WARNING
+			   "Your BIOS is broken and requested that x2apic be disabled\n"
+			   "This will leave your machine vulnerable to irq-injection attacks\n"
+			   "Use 'intremap=no_x2apic_optout' to override BIOS request\n");
+	}
+
 	for_each_drhd_unit(drhd) {
 		struct intel_iommu *iommu = drhd->iommu;
 
@@ -606,8 +633,9 @@ int __init enable_intr_remapping(int eim)
 		goto error;
 
 	intr_remapping_enabled = 1;
+	pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic");
 
-	return 0;
+	return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;
 
 error:
 	/*
@@ -745,6 +773,15 @@ int __init parse_ioapics_under_ir(void)
 	return ir_supported;
 }
 
+int __init ir_dev_scope_init(void)
+{
+	if (!intr_remapping_enabled)
+		return 0;
+
+	return dmar_dev_scope_init();
+}
+rootfs_initcall(ir_dev_scope_init);
+
 void disable_intr_remapping(void)
 {
 	struct dmar_drhd_unit *drhd;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 6e6b6a11b3ce..2198b2dbbcd3 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -16,6 +16,10 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
+#define pr_fmt(fmt)    "%s: " fmt, __func__
+
+#include <linux/device.h>
+#include <linux/kernel.h>
 #include <linux/bug.h>
 #include <linux/types.h>
 #include <linux/module.h>
@@ -23,32 +27,129 @@
 #include <linux/errno.h>
 #include <linux/iommu.h>
 
-static struct iommu_ops *iommu_ops;
+static ssize_t show_iommu_group(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	unsigned int groupid;
+
+	if (iommu_device_group(dev, &groupid))
+		return 0;
+
+	return sprintf(buf, "%u", groupid);
+}
+static DEVICE_ATTR(iommu_group, S_IRUGO, show_iommu_group, NULL);
+
+static int add_iommu_group(struct device *dev, void *data)
+{
+	unsigned int groupid;
+
+	if (iommu_device_group(dev, &groupid) == 0)
+		return device_create_file(dev, &dev_attr_iommu_group);
+
+	return 0;
+}
+
+static int remove_iommu_group(struct device *dev)
+{
+	unsigned int groupid;
+
+	if (iommu_device_group(dev, &groupid) == 0)
+		device_remove_file(dev, &dev_attr_iommu_group);
+
+	return 0;
+}
+
+static int iommu_device_notifier(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct device *dev = data;
+
+	if (action == BUS_NOTIFY_ADD_DEVICE)
+		return add_iommu_group(dev, NULL);
+	else if (action == BUS_NOTIFY_DEL_DEVICE)
+		return remove_iommu_group(dev);
+
+	return 0;
+}
 
-void register_iommu(struct iommu_ops *ops)
+static struct notifier_block iommu_device_nb = {
+	.notifier_call = iommu_device_notifier,
+};
+
+static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops)
+{
+	bus_register_notifier(bus, &iommu_device_nb);
+	bus_for_each_dev(bus, NULL, NULL, add_iommu_group);
+}
+
+/**
+ * bus_set_iommu - set iommu-callbacks for the bus
+ * @bus: bus.
+ * @ops: the callbacks provided by the iommu-driver
+ *
+ * This function is called by an iommu driver to set the iommu methods
+ * used for a particular bus. Drivers for devices on that bus can use
+ * the iommu-api after these ops are registered.
+ * This special function is needed because IOMMUs are usually devices on
+ * the bus itself, so the iommu drivers are not initialized when the bus
+ * is set up. With this function the iommu-driver can set the iommu-ops
+ * afterwards.
+ */
+int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops)
 {
-	if (iommu_ops)
-		BUG();
+	if (bus->iommu_ops != NULL)
+		return -EBUSY;
 
-	iommu_ops = ops;
+	bus->iommu_ops = ops;
+
+	/* Do IOMMU specific setup for this bus-type */
+	iommu_bus_init(bus, ops);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bus_set_iommu);
+
+bool iommu_present(struct bus_type *bus)
+{
+	return bus->iommu_ops != NULL;
 }
+EXPORT_SYMBOL_GPL(iommu_present);
 
-bool iommu_found(void)
+/**
+ * iommu_set_fault_handler() - set a fault handler for an iommu domain
+ * @domain: iommu domain
+ * @handler: fault handler
+ *
+ * This function should be used by IOMMU users which want to be notified
+ * whenever an IOMMU fault happens.
+ *
+ * The fault handler itself should return 0 on success, and an appropriate
+ * error code otherwise.
+ */
+void iommu_set_fault_handler(struct iommu_domain *domain,
+					iommu_fault_handler_t handler)
 {
-	return iommu_ops != NULL;
+	BUG_ON(!domain);
+
+	domain->handler = handler;
 }
-EXPORT_SYMBOL_GPL(iommu_found);
+EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
 
-struct iommu_domain *iommu_domain_alloc(void)
+struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
 {
 	struct iommu_domain *domain;
 	int ret;
 
-	domain = kmalloc(sizeof(*domain), GFP_KERNEL);
+	if (bus == NULL || bus->iommu_ops == NULL)
+		return NULL;
+
+	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
 	if (!domain)
 		return NULL;
 
-	ret = iommu_ops->domain_init(domain);
+	domain->ops = bus->iommu_ops;
+
+	ret = domain->ops->domain_init(domain);
 	if (ret)
 		goto out_free;
 
@@ -63,62 +164,180 @@ EXPORT_SYMBOL_GPL(iommu_domain_alloc);
 
 void iommu_domain_free(struct iommu_domain *domain)
 {
-	iommu_ops->domain_destroy(domain);
+	if (likely(domain->ops->domain_destroy != NULL))
+		domain->ops->domain_destroy(domain);
+
 	kfree(domain);
 }
 EXPORT_SYMBOL_GPL(iommu_domain_free);
 
 int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
 {
-	return iommu_ops->attach_dev(domain, dev);
+	if (unlikely(domain->ops->attach_dev == NULL))
+		return -ENODEV;
+
+	return domain->ops->attach_dev(domain, dev);
 }
 EXPORT_SYMBOL_GPL(iommu_attach_device);
 
 void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
 {
-	iommu_ops->detach_dev(domain, dev);
+	if (unlikely(domain->ops->detach_dev == NULL))
+		return;
+
+	domain->ops->detach_dev(domain, dev);
 }
 EXPORT_SYMBOL_GPL(iommu_detach_device);
 
 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
 			       unsigned long iova)
 {
-	return iommu_ops->iova_to_phys(domain, iova);
+	if (unlikely(domain->ops->iova_to_phys == NULL))
+		return 0;
+
+	return domain->ops->iova_to_phys(domain, iova);
 }
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
 int iommu_domain_has_cap(struct iommu_domain *domain,
 			 unsigned long cap)
 {
-	return iommu_ops->domain_has_cap(domain, cap);
+	if (unlikely(domain->ops->domain_has_cap == NULL))
+		return 0;
+
+	return domain->ops->domain_has_cap(domain, cap);
 }
 EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
 
 int iommu_map(struct iommu_domain *domain, unsigned long iova,
-	      phys_addr_t paddr, int gfp_order, int prot)
+	      phys_addr_t paddr, size_t size, int prot)
 {
-	unsigned long invalid_mask;
-	size_t size;
+	unsigned long orig_iova = iova;
+	unsigned int min_pagesz;
+	size_t orig_size = size;
+	int ret = 0;
+
+	if (unlikely(domain->ops->map == NULL))
+		return -ENODEV;
+
+	/* find out the minimum page size supported */
+	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+
+	/*
+	 * both the virtual address and the physical one, as well as
+	 * the size of the mapping, must be aligned (at least) to the
+	 * size of the smallest page supported by the hardware
+	 */
+	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
+		pr_err("unaligned: iova 0x%lx pa 0x%lx size 0x%lx min_pagesz "
+			"0x%x\n", iova, (unsigned long)paddr,
+			(unsigned long)size, min_pagesz);
+		return -EINVAL;
+	}
+
+	pr_debug("map: iova 0x%lx pa 0x%lx size 0x%lx\n", iova,
+				(unsigned long)paddr, (unsigned long)size);
+
+	while (size) {
+		unsigned long pgsize, addr_merge = iova | paddr;
+		unsigned int pgsize_idx;
+
+		/* Max page size that still fits into 'size' */
+		pgsize_idx = __fls(size);
+
+		/* need to consider alignment requirements ? */
+		if (likely(addr_merge)) {
+			/* Max page size allowed by both iova and paddr */
+			unsigned int align_pgsize_idx = __ffs(addr_merge);
+
+			pgsize_idx = min(pgsize_idx, align_pgsize_idx);
+		}
 
-	size         = 0x1000UL << gfp_order;
-	invalid_mask = size - 1;
+		/* build a mask of acceptable page sizes */
+		pgsize = (1UL << (pgsize_idx + 1)) - 1;
 
-	BUG_ON((iova | paddr) & invalid_mask);
+		/* throw away page sizes not supported by the hardware */
+		pgsize &= domain->ops->pgsize_bitmap;
 
-	return iommu_ops->map(domain, iova, paddr, gfp_order, prot);
+		/* make sure we're still sane */
+		BUG_ON(!pgsize);
+
+		/* pick the biggest page */
+		pgsize_idx = __fls(pgsize);
+		pgsize = 1UL << pgsize_idx;
+
+		pr_debug("mapping: iova 0x%lx pa 0x%lx pgsize %lu\n", iova,
+					(unsigned long)paddr, pgsize);
+
+		ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
+		if (ret)
+			break;
+
+		iova += pgsize;
+		paddr += pgsize;
+		size -= pgsize;
+	}
+
+	/* unroll mapping in case something went wrong */
+	if (ret)
+		iommu_unmap(domain, orig_iova, orig_size - size);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_map);
 
-int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
+size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
 {
-	unsigned long invalid_mask;
-	size_t size;
+	size_t unmapped_page, unmapped = 0;
+	unsigned int min_pagesz;
+
+	if (unlikely(domain->ops->unmap == NULL))
+		return -ENODEV;
+
+	/* find out the minimum page size supported */
+	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
 
-	size         = 0x1000UL << gfp_order;
-	invalid_mask = size - 1;
+	/*
+	 * The virtual address, as well as the size of the mapping, must be
+	 * aligned (at least) to the size of the smallest page supported
+	 * by the hardware
+	 */
+	if (!IS_ALIGNED(iova | size, min_pagesz)) {
+		pr_err("unaligned: iova 0x%lx size 0x%lx min_pagesz 0x%x\n",
+					iova, (unsigned long)size, min_pagesz);
+		return -EINVAL;
+	}
 
-	BUG_ON(iova & invalid_mask);
+	pr_debug("unmap this: iova 0x%lx size 0x%lx\n", iova,
+							(unsigned long)size);
 
-	return iommu_ops->unmap(domain, iova, gfp_order);
+	/*
+	 * Keep iterating until we either unmap 'size' bytes (or more)
+	 * or we hit an area that isn't mapped.
+	 */
+	while (unmapped < size) {
+		size_t left = size - unmapped;
+
+		unmapped_page = domain->ops->unmap(domain, iova, left);
+		if (!unmapped_page)
+			break;
+
+		pr_debug("unmapped: iova 0x%lx size %lx\n", iova,
+					(unsigned long)unmapped_page);
+
+		iova += unmapped_page;
+		unmapped += unmapped_page;
+	}
+
+	return unmapped;
 }
 EXPORT_SYMBOL_GPL(iommu_unmap);
+
+int iommu_device_group(struct device *dev, unsigned int *groupid)
+{
+	if (iommu_present(dev->bus) && dev->bus->iommu_ops->device_group)
+		return dev->bus->iommu_ops->device_group(dev, groupid);
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(iommu_device_group);
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 1a584e077c61..08a90b88e40d 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -42,6 +42,9 @@ __asm__ __volatile__ (							\
 #define RCP15_PRRR(reg)		MRC(reg, p15, 0, c10, c2, 0)
 #define RCP15_NMRR(reg)		MRC(reg, p15, 0, c10, c2, 1)
 
+/* bitmap of the page sizes currently supported */
+#define MSM_IOMMU_PGSIZES	(SZ_4K | SZ_64K | SZ_1M | SZ_16M)
+
 static int msm_iommu_tex_class[4];
 
 DEFINE_SPINLOCK(msm_iommu_lock);
@@ -352,7 +355,7 @@ fail:
 }
 
 static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
-			 phys_addr_t pa, int order, int prot)
+			 phys_addr_t pa, size_t len, int prot)
 {
 	struct msm_priv *priv;
 	unsigned long flags;
@@ -363,7 +366,6 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
 	unsigned long *sl_pte;
 	unsigned long sl_offset;
 	unsigned int pgprot;
-	size_t len = 0x1000UL << order;
 	int ret = 0, tex, sh;
 
 	spin_lock_irqsave(&msm_iommu_lock, flags);
@@ -463,8 +465,8 @@ fail:
 	return ret;
 }
 
-static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
-			    int order)
+static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
+			    size_t len)
 {
 	struct msm_priv *priv;
 	unsigned long flags;
@@ -474,7 +476,6 @@ static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
 	unsigned long *sl_table;
 	unsigned long *sl_pte;
 	unsigned long sl_offset;
-	size_t len = 0x1000UL << order;
 	int i, ret = 0;
 
 	spin_lock_irqsave(&msm_iommu_lock, flags);
@@ -543,9 +544,13 @@ static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
 	}
 
 	ret = __flush_iotlb(domain);
+
 fail:
 	spin_unlock_irqrestore(&msm_iommu_lock, flags);
-	return ret;
+
+	/* the IOMMU API requires us to return how many bytes were unmapped */
+	len = ret ? 0 : len;
+	return len;
 }
 
 static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -677,7 +682,8 @@ static struct iommu_ops msm_iommu_ops = {
 	.map = msm_iommu_map,
 	.unmap = msm_iommu_unmap,
 	.iova_to_phys = msm_iommu_iova_to_phys,
-	.domain_has_cap = msm_iommu_domain_has_cap
+	.domain_has_cap = msm_iommu_domain_has_cap,
+	.pgsize_bitmap = MSM_IOMMU_PGSIZES,
 };
 
 static int __init get_tex_class(int icp, int ocp, int mt, int nos)
@@ -721,7 +727,7 @@ static void __init setup_iommu_tex_classes(void)
 static int __init msm_iommu_init(void)
 {
 	setup_iommu_tex_classes();
-	register_iommu(&msm_iommu_ops);
+	bus_set_iommu(&platform_bus_type, &msm_iommu_ops);
 	return 0;
 }
 
diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
new file mode 100644
index 000000000000..288da5c1499d
--- /dev/null
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -0,0 +1,419 @@
+/*
+ * omap iommu: debugfs interface
+ *
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/platform_device.h>
+#include <linux/debugfs.h>
+
+#include <plat/iommu.h>
+#include <plat/iovmm.h>
+
+#include <plat/iopgtable.h>
+
+#define MAXCOLUMN 100 /* for short messages */
+
+static DEFINE_MUTEX(iommu_debug_lock);
+
+static struct dentry *iommu_debug_root;
+
+static ssize_t debug_read_ver(struct file *file, char __user *userbuf,
+			      size_t count, loff_t *ppos)
+{
+	u32 ver = omap_iommu_arch_version();
+	char buf[MAXCOLUMN], *p = buf;
+
+	p += sprintf(p, "H/W version: %d.%d\n", (ver >> 4) & 0xf , ver & 0xf);
+
+	return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+}
+
+static ssize_t debug_read_regs(struct file *file, char __user *userbuf,
+			       size_t count, loff_t *ppos)
+{
+	struct omap_iommu *obj = file->private_data;
+	char *p, *buf;
+	ssize_t bytes;
+
+	buf = kmalloc(count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	p = buf;
+
+	mutex_lock(&iommu_debug_lock);
+
+	bytes = omap_iommu_dump_ctx(obj, p, count);
+	bytes = simple_read_from_buffer(userbuf, count, ppos, buf, bytes);
+
+	mutex_unlock(&iommu_debug_lock);
+	kfree(buf);
+
+	return bytes;
+}
+
+static ssize_t debug_read_tlb(struct file *file, char __user *userbuf,
+			      size_t count, loff_t *ppos)
+{
+	struct omap_iommu *obj = file->private_data;
+	char *p, *buf;
+	ssize_t bytes, rest;
+
+	buf = kmalloc(count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	p = buf;
+
+	mutex_lock(&iommu_debug_lock);
+
+	p += sprintf(p, "%8s %8s\n", "cam:", "ram:");
+	p += sprintf(p, "-----------------------------------------\n");
+	rest = count - (p - buf);
+	p += omap_dump_tlb_entries(obj, p, rest);
+
+	bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+
+	mutex_unlock(&iommu_debug_lock);
+	kfree(buf);
+
+	return bytes;
+}
+
+static ssize_t debug_write_pagetable(struct file *file,
+		     const char __user *userbuf, size_t count, loff_t *ppos)
+{
+	struct iotlb_entry e;
+	struct cr_regs cr;
+	int err;
+	struct omap_iommu *obj = file->private_data;
+	char buf[MAXCOLUMN], *p = buf;
+
+	count = min(count, sizeof(buf));
+
+	mutex_lock(&iommu_debug_lock);
+	if (copy_from_user(p, userbuf, count)) {
+		mutex_unlock(&iommu_debug_lock);
+		return -EFAULT;
+	}
+
+	sscanf(p, "%x %x", &cr.cam, &cr.ram);
+	if (!cr.cam || !cr.ram) {
+		mutex_unlock(&iommu_debug_lock);
+		return -EINVAL;
+	}
+
+	omap_iotlb_cr_to_e(&cr, &e);
+	err = omap_iopgtable_store_entry(obj, &e);
+	if (err)
+		dev_err(obj->dev, "%s: fail to store cr\n", __func__);
+
+	mutex_unlock(&iommu_debug_lock);
+	return count;
+}
+
+#define dump_ioptable_entry_one(lv, da, val)			\
+	({							\
+		int __err = 0;					\
+		ssize_t bytes;					\
+		const int maxcol = 22;				\
+		const char *str = "%d: %08x %08x\n";		\
+		bytes = snprintf(p, maxcol, str, lv, da, val);	\
+		p += bytes;					\
+		len -= bytes;					\
+		if (len < maxcol)				\
+			__err = -ENOMEM;			\
+		__err;						\
+	})
+
+static ssize_t dump_ioptable(struct omap_iommu *obj, char *buf, ssize_t len)
+{
+	int i;
+	u32 *iopgd;
+	char *p = buf;
+
+	spin_lock(&obj->page_table_lock);
+
+	iopgd = iopgd_offset(obj, 0);
+	for (i = 0; i < PTRS_PER_IOPGD; i++, iopgd++) {
+		int j, err;
+		u32 *iopte;
+		u32 da;
+
+		if (!*iopgd)
+			continue;
+
+		if (!(*iopgd & IOPGD_TABLE)) {
+			da = i << IOPGD_SHIFT;
+
+			err = dump_ioptable_entry_one(1, da, *iopgd);
+			if (err)
+				goto out;
+			continue;
+		}
+
+		iopte = iopte_offset(iopgd, 0);
+
+		for (j = 0; j < PTRS_PER_IOPTE; j++, iopte++) {
+			if (!*iopte)
+				continue;
+
+			da = (i << IOPGD_SHIFT) + (j << IOPTE_SHIFT);
+			err = dump_ioptable_entry_one(2, da, *iopgd);
+			if (err)
+				goto out;
+		}
+	}
+out:
+	spin_unlock(&obj->page_table_lock);
+
+	return p - buf;
+}
+
+static ssize_t debug_read_pagetable(struct file *file, char __user *userbuf,
+				    size_t count, loff_t *ppos)
+{
+	struct omap_iommu *obj = file->private_data;
+	char *p, *buf;
+	size_t bytes;
+
+	buf = (char *)__get_free_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	p = buf;
+
+	p += sprintf(p, "L: %8s %8s\n", "da:", "pa:");
+	p += sprintf(p, "-----------------------------------------\n");
+
+	mutex_lock(&iommu_debug_lock);
+
+	bytes = PAGE_SIZE - (p - buf);
+	p += dump_ioptable(obj, p, bytes);
+
+	bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+
+	mutex_unlock(&iommu_debug_lock);
+	free_page((unsigned long)buf);
+
+	return bytes;
+}
+
+static ssize_t debug_read_mmap(struct file *file, char __user *userbuf,
+			       size_t count, loff_t *ppos)
+{
+	struct omap_iommu *obj = file->private_data;
+	char *p, *buf;
+	struct iovm_struct *tmp;
+	int uninitialized_var(i);
+	ssize_t bytes;
+
+	buf = (char *)__get_free_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	p = buf;
+
+	p += sprintf(p, "%-3s %-8s %-8s %6s %8s\n",
+		     "No", "start", "end", "size", "flags");
+	p += sprintf(p, "-------------------------------------------------\n");
+
+	mutex_lock(&iommu_debug_lock);
+
+	list_for_each_entry(tmp, &obj->mmap, list) {
+		size_t len;
+		const char *str = "%3d %08x-%08x %6x %8x\n";
+		const int maxcol = 39;
+
+		len = tmp->da_end - tmp->da_start;
+		p += snprintf(p, maxcol, str,
+			      i, tmp->da_start, tmp->da_end, len, tmp->flags);
+
+		if (PAGE_SIZE - (p - buf) < maxcol)
+			break;
+		i++;
+	}
+
+	bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+
+	mutex_unlock(&iommu_debug_lock);
+	free_page((unsigned long)buf);
+
+	return bytes;
+}
+
+static ssize_t debug_read_mem(struct file *file, char __user *userbuf,
+			      size_t count, loff_t *ppos)
+{
+	struct omap_iommu *obj = file->private_data;
+	char *p, *buf;
+	struct iovm_struct *area;
+	ssize_t bytes;
+
+	count = min_t(ssize_t, count, PAGE_SIZE);
+
+	buf = (char *)__get_free_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	p = buf;
+
+	mutex_lock(&iommu_debug_lock);
+
+	area = omap_find_iovm_area(obj, (u32)ppos);
+	if (IS_ERR(area)) {
+		bytes = -EINVAL;
+		goto err_out;
+	}
+	memcpy(p, area->va, count);
+	p += count;
+
+	bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+err_out:
+	mutex_unlock(&iommu_debug_lock);
+	free_page((unsigned long)buf);
+
+	return bytes;
+}
+
+static ssize_t debug_write_mem(struct file *file, const char __user *userbuf,
+			       size_t count, loff_t *ppos)
+{
+	struct omap_iommu *obj = file->private_data;
+	struct iovm_struct *area;
+	char *p, *buf;
+
+	count = min_t(size_t, count, PAGE_SIZE);
+
+	buf = (char *)__get_free_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	p = buf;
+
+	mutex_lock(&iommu_debug_lock);
+
+	if (copy_from_user(p, userbuf, count)) {
+		count =  -EFAULT;
+		goto err_out;
+	}
+
+	area = omap_find_iovm_area(obj, (u32)ppos);
+	if (IS_ERR(area)) {
+		count = -EINVAL;
+		goto err_out;
+	}
+	memcpy(area->va, p, count);
+err_out:
+	mutex_unlock(&iommu_debug_lock);
+	free_page((unsigned long)buf);
+
+	return count;
+}
+
+static int debug_open_generic(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+#define DEBUG_FOPS(name)						\
+	static const struct file_operations debug_##name##_fops = {	\
+		.open = debug_open_generic,				\
+		.read = debug_read_##name,				\
+		.write = debug_write_##name,				\
+		.llseek = generic_file_llseek,				\
+	};
+
+#define DEBUG_FOPS_RO(name)						\
+	static const struct file_operations debug_##name##_fops = {	\
+		.open = debug_open_generic,				\
+		.read = debug_read_##name,				\
+		.llseek = generic_file_llseek,				\
+	};
+
+DEBUG_FOPS_RO(ver);
+DEBUG_FOPS_RO(regs);
+DEBUG_FOPS_RO(tlb);
+DEBUG_FOPS(pagetable);
+DEBUG_FOPS_RO(mmap);
+DEBUG_FOPS(mem);
+
+#define __DEBUG_ADD_FILE(attr, mode)					\
+	{								\
+		struct dentry *dent;					\
+		dent = debugfs_create_file(#attr, mode, parent,		\
+					   obj, &debug_##attr##_fops);	\
+		if (!dent)						\
+			return -ENOMEM;					\
+	}
+
+#define DEBUG_ADD_FILE(name) __DEBUG_ADD_FILE(name, 600)
+#define DEBUG_ADD_FILE_RO(name) __DEBUG_ADD_FILE(name, 400)
+
+static int iommu_debug_register(struct device *dev, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct omap_iommu *obj = platform_get_drvdata(pdev);
+	struct dentry *d, *parent;
+
+	if (!obj || !obj->dev)
+		return -EINVAL;
+
+	d = debugfs_create_dir(obj->name, iommu_debug_root);
+	if (!d)
+		return -ENOMEM;
+	parent = d;
+
+	d = debugfs_create_u8("nr_tlb_entries", 400, parent,
+			      (u8 *)&obj->nr_tlb_entries);
+	if (!d)
+		return -ENOMEM;
+
+	DEBUG_ADD_FILE_RO(ver);
+	DEBUG_ADD_FILE_RO(regs);
+	DEBUG_ADD_FILE_RO(tlb);
+	DEBUG_ADD_FILE(pagetable);
+	DEBUG_ADD_FILE_RO(mmap);
+	DEBUG_ADD_FILE(mem);
+
+	return 0;
+}
+
+static int __init iommu_debug_init(void)
+{
+	struct dentry *d;
+	int err;
+
+	d = debugfs_create_dir("iommu", NULL);
+	if (!d)
+		return -ENOMEM;
+	iommu_debug_root = d;
+
+	err = omap_foreach_iommu_device(d, iommu_debug_register);
+	if (err)
+		goto err_out;
+	return 0;
+
+err_out:
+	debugfs_remove_recursive(iommu_debug_root);
+	return err;
+}
+module_init(iommu_debug_init)
+
+static void __exit iommu_debugfs_exit(void)
+{
+	debugfs_remove_recursive(iommu_debug_root);
+}
+module_exit(iommu_debugfs_exit)
+
+MODULE_DESCRIPTION("omap iommu: debugfs interface");
+MODULE_AUTHOR("Hiroshi DOYU <Hiroshi.DOYU@nokia.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
new file mode 100644
index 000000000000..d8edd979d01b
--- /dev/null
+++ b/drivers/iommu/omap-iommu.c
@@ -0,0 +1,1239 @@
+/*
+ * omap iommu: tlb and pagetable primitives
+ *
+ * Copyright (C) 2008-2010 Nokia Corporation
+ *
+ * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>,
+ *		Paul Mundt and Toshihiro Kobayashi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/iommu.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+
+#include <asm/cacheflush.h>
+
+#include <plat/iommu.h>
+
+#include <plat/iopgtable.h>
+
+#define for_each_iotlb_cr(obj, n, __i, cr)				\
+	for (__i = 0;							\
+	     (__i < (n)) && (cr = __iotlb_read_cr((obj), __i), true);	\
+	     __i++)
+
+/* bitmap of the page sizes currently supported */
+#define OMAP_IOMMU_PGSIZES	(SZ_4K | SZ_64K | SZ_1M | SZ_16M)
+
+/**
+ * struct omap_iommu_domain - omap iommu domain
+ * @pgtable:	the page table
+ * @iommu_dev:	an omap iommu device attached to this domain. only a single
+ *		iommu device can be attached for now.
+ * @lock:	domain lock, should be taken when attaching/detaching
+ */
+struct omap_iommu_domain {
+	u32 *pgtable;
+	struct omap_iommu *iommu_dev;
+	spinlock_t lock;
+};
+
+/* accommodate the difference between omap1 and omap2/3 */
+static const struct iommu_functions *arch_iommu;
+
+static struct platform_driver omap_iommu_driver;
+static struct kmem_cache *iopte_cachep;
+
+/**
+ * omap_install_iommu_arch - Install archtecure specific iommu functions
+ * @ops:	a pointer to architecture specific iommu functions
+ *
+ * There are several kind of iommu algorithm(tlb, pagetable) among
+ * omap series. This interface installs such an iommu algorighm.
+ **/
+int omap_install_iommu_arch(const struct iommu_functions *ops)
+{
+	if (arch_iommu)
+		return -EBUSY;
+
+	arch_iommu = ops;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(omap_install_iommu_arch);
+
+/**
+ * omap_uninstall_iommu_arch - Uninstall archtecure specific iommu functions
+ * @ops:	a pointer to architecture specific iommu functions
+ *
+ * This interface uninstalls the iommu algorighm installed previously.
+ **/
+void omap_uninstall_iommu_arch(const struct iommu_functions *ops)
+{
+	if (arch_iommu != ops)
+		pr_err("%s: not your arch\n", __func__);
+
+	arch_iommu = NULL;
+}
+EXPORT_SYMBOL_GPL(omap_uninstall_iommu_arch);
+
+/**
+ * omap_iommu_save_ctx - Save registers for pm off-mode support
+ * @dev:	client device
+ **/
+void omap_iommu_save_ctx(struct device *dev)
+{
+	struct omap_iommu *obj = dev_to_omap_iommu(dev);
+
+	arch_iommu->save_ctx(obj);
+}
+EXPORT_SYMBOL_GPL(omap_iommu_save_ctx);
+
+/**
+ * omap_iommu_restore_ctx - Restore registers for pm off-mode support
+ * @dev:	client device
+ **/
+void omap_iommu_restore_ctx(struct device *dev)
+{
+	struct omap_iommu *obj = dev_to_omap_iommu(dev);
+
+	arch_iommu->restore_ctx(obj);
+}
+EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx);
+
+/**
+ * omap_iommu_arch_version - Return running iommu arch version
+ **/
+u32 omap_iommu_arch_version(void)
+{
+	return arch_iommu->version;
+}
+EXPORT_SYMBOL_GPL(omap_iommu_arch_version);
+
+static int iommu_enable(struct omap_iommu *obj)
+{
+	int err;
+
+	if (!obj)
+		return -EINVAL;
+
+	if (!arch_iommu)
+		return -ENODEV;
+
+	clk_enable(obj->clk);
+
+	err = arch_iommu->enable(obj);
+
+	clk_disable(obj->clk);
+	return err;
+}
+
+static void iommu_disable(struct omap_iommu *obj)
+{
+	if (!obj)
+		return;
+
+	clk_enable(obj->clk);
+
+	arch_iommu->disable(obj);
+
+	clk_disable(obj->clk);
+}
+
+/*
+ *	TLB operations
+ */
+void omap_iotlb_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e)
+{
+	BUG_ON(!cr || !e);
+
+	arch_iommu->cr_to_e(cr, e);
+}
+EXPORT_SYMBOL_GPL(omap_iotlb_cr_to_e);
+
+static inline int iotlb_cr_valid(struct cr_regs *cr)
+{
+	if (!cr)
+		return -EINVAL;
+
+	return arch_iommu->cr_valid(cr);
+}
+
+static inline struct cr_regs *iotlb_alloc_cr(struct omap_iommu *obj,
+					     struct iotlb_entry *e)
+{
+	if (!e)
+		return NULL;
+
+	return arch_iommu->alloc_cr(obj, e);
+}
+
+static u32 iotlb_cr_to_virt(struct cr_regs *cr)
+{
+	return arch_iommu->cr_to_virt(cr);
+}
+
+static u32 get_iopte_attr(struct iotlb_entry *e)
+{
+	return arch_iommu->get_pte_attr(e);
+}
+
+static u32 iommu_report_fault(struct omap_iommu *obj, u32 *da)
+{
+	return arch_iommu->fault_isr(obj, da);
+}
+
+static void iotlb_lock_get(struct omap_iommu *obj, struct iotlb_lock *l)
+{
+	u32 val;
+
+	val = iommu_read_reg(obj, MMU_LOCK);
+
+	l->base = MMU_LOCK_BASE(val);
+	l->vict = MMU_LOCK_VICT(val);
+
+}
+
+static void iotlb_lock_set(struct omap_iommu *obj, struct iotlb_lock *l)
+{
+	u32 val;
+
+	val = (l->base << MMU_LOCK_BASE_SHIFT);
+	val |= (l->vict << MMU_LOCK_VICT_SHIFT);
+
+	iommu_write_reg(obj, val, MMU_LOCK);
+}
+
+static void iotlb_read_cr(struct omap_iommu *obj, struct cr_regs *cr)
+{
+	arch_iommu->tlb_read_cr(obj, cr);
+}
+
+static void iotlb_load_cr(struct omap_iommu *obj, struct cr_regs *cr)
+{
+	arch_iommu->tlb_load_cr(obj, cr);
+
+	iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY);
+	iommu_write_reg(obj, 1, MMU_LD_TLB);
+}
+
+/**
+ * iotlb_dump_cr - Dump an iommu tlb entry into buf
+ * @obj:	target iommu
+ * @cr:		contents of cam and ram register
+ * @buf:	output buffer
+ **/
+static inline ssize_t iotlb_dump_cr(struct omap_iommu *obj, struct cr_regs *cr,
+				    char *buf)
+{
+	BUG_ON(!cr || !buf);
+
+	return arch_iommu->dump_cr(obj, cr, buf);
+}
+
+/* only used in iotlb iteration for-loop */
+static struct cr_regs __iotlb_read_cr(struct omap_iommu *obj, int n)
+{
+	struct cr_regs cr;
+	struct iotlb_lock l;
+
+	iotlb_lock_get(obj, &l);
+	l.vict = n;
+	iotlb_lock_set(obj, &l);
+	iotlb_read_cr(obj, &cr);
+
+	return cr;
+}
+
+/**
+ * load_iotlb_entry - Set an iommu tlb entry
+ * @obj:	target iommu
+ * @e:		an iommu tlb entry info
+ **/
+#ifdef PREFETCH_IOTLB
+static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e)
+{
+	int err = 0;
+	struct iotlb_lock l;
+	struct cr_regs *cr;
+
+	if (!obj || !obj->nr_tlb_entries || !e)
+		return -EINVAL;
+
+	clk_enable(obj->clk);
+
+	iotlb_lock_get(obj, &l);
+	if (l.base == obj->nr_tlb_entries) {
+		dev_warn(obj->dev, "%s: preserve entries full\n", __func__);
+		err = -EBUSY;
+		goto out;
+	}
+	if (!e->prsvd) {
+		int i;
+		struct cr_regs tmp;
+
+		for_each_iotlb_cr(obj, obj->nr_tlb_entries, i, tmp)
+			if (!iotlb_cr_valid(&tmp))
+				break;
+
+		if (i == obj->nr_tlb_entries) {
+			dev_dbg(obj->dev, "%s: full: no entry\n", __func__);
+			err = -EBUSY;
+			goto out;
+		}
+
+		iotlb_lock_get(obj, &l);
+	} else {
+		l.vict = l.base;
+		iotlb_lock_set(obj, &l);
+	}
+
+	cr = iotlb_alloc_cr(obj, e);
+	if (IS_ERR(cr)) {
+		clk_disable(obj->clk);
+		return PTR_ERR(cr);
+	}
+
+	iotlb_load_cr(obj, cr);
+	kfree(cr);
+
+	if (e->prsvd)
+		l.base++;
+	/* increment victim for next tlb load */
+	if (++l.vict == obj->nr_tlb_entries)
+		l.vict = l.base;
+	iotlb_lock_set(obj, &l);
+out:
+	clk_disable(obj->clk);
+	return err;
+}
+
+#else /* !PREFETCH_IOTLB */
+
+static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e)
+{
+	return 0;
+}
+
+#endif /* !PREFETCH_IOTLB */
+
+static int prefetch_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e)
+{
+	return load_iotlb_entry(obj, e);
+}
+
+/**
+ * flush_iotlb_page - Clear an iommu tlb entry
+ * @obj:	target iommu
+ * @da:		iommu device virtual address
+ *
+ * Clear an iommu tlb entry which includes 'da' address.
+ **/
+static void flush_iotlb_page(struct omap_iommu *obj, u32 da)
+{
+	int i;
+	struct cr_regs cr;
+
+	clk_enable(obj->clk);
+
+	for_each_iotlb_cr(obj, obj->nr_tlb_entries, i, cr) {
+		u32 start;
+		size_t bytes;
+
+		if (!iotlb_cr_valid(&cr))
+			continue;
+
+		start = iotlb_cr_to_virt(&cr);
+		bytes = iopgsz_to_bytes(cr.cam & 3);
+
+		if ((start <= da) && (da < start + bytes)) {
+			dev_dbg(obj->dev, "%s: %08x<=%08x(%x)\n",
+				__func__, start, da, bytes);
+			iotlb_load_cr(obj, &cr);
+			iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY);
+		}
+	}
+	clk_disable(obj->clk);
+
+	if (i == obj->nr_tlb_entries)
+		dev_dbg(obj->dev, "%s: no page for %08x\n", __func__, da);
+}
+
+/**
+ * flush_iotlb_all - Clear all iommu tlb entries
+ * @obj:	target iommu
+ **/
+static void flush_iotlb_all(struct omap_iommu *obj)
+{
+	struct iotlb_lock l;
+
+	clk_enable(obj->clk);
+
+	l.base = 0;
+	l.vict = 0;
+	iotlb_lock_set(obj, &l);
+
+	iommu_write_reg(obj, 1, MMU_GFLUSH);
+
+	clk_disable(obj->clk);
+}
+
+#if defined(CONFIG_OMAP_IOMMU_DEBUG) || defined(CONFIG_OMAP_IOMMU_DEBUG_MODULE)
+
+ssize_t omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t bytes)
+{
+	if (!obj || !buf)
+		return -EINVAL;
+
+	clk_enable(obj->clk);
+
+	bytes = arch_iommu->dump_ctx(obj, buf, bytes);
+
+	clk_disable(obj->clk);
+
+	return bytes;
+}
+EXPORT_SYMBOL_GPL(omap_iommu_dump_ctx);
+
+static int
+__dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num)
+{
+	int i;
+	struct iotlb_lock saved;
+	struct cr_regs tmp;
+	struct cr_regs *p = crs;
+
+	clk_enable(obj->clk);
+	iotlb_lock_get(obj, &saved);
+
+	for_each_iotlb_cr(obj, num, i, tmp) {
+		if (!iotlb_cr_valid(&tmp))
+			continue;
+		*p++ = tmp;
+	}
+
+	iotlb_lock_set(obj, &saved);
+	clk_disable(obj->clk);
+
+	return  p - crs;
+}
+
+/**
+ * omap_dump_tlb_entries - dump cr arrays to given buffer
+ * @obj:	target iommu
+ * @buf:	output buffer
+ **/
+size_t omap_dump_tlb_entries(struct omap_iommu *obj, char *buf, ssize_t bytes)
+{
+	int i, num;
+	struct cr_regs *cr;
+	char *p = buf;
+
+	num = bytes / sizeof(*cr);
+	num = min(obj->nr_tlb_entries, num);
+
+	cr = kcalloc(num, sizeof(*cr), GFP_KERNEL);
+	if (!cr)
+		return 0;
+
+	num = __dump_tlb_entries(obj, cr, num);
+	for (i = 0; i < num; i++)
+		p += iotlb_dump_cr(obj, cr + i, p);
+	kfree(cr);
+
+	return p - buf;
+}
+EXPORT_SYMBOL_GPL(omap_dump_tlb_entries);
+
+int omap_foreach_iommu_device(void *data, int (*fn)(struct device *, void *))
+{
+	return driver_for_each_device(&omap_iommu_driver.driver,
+				      NULL, data, fn);
+}
+EXPORT_SYMBOL_GPL(omap_foreach_iommu_device);
+
+#endif /* CONFIG_OMAP_IOMMU_DEBUG_MODULE */
+
+/*
+ *	H/W pagetable operations
+ */
+static void flush_iopgd_range(u32 *first, u32 *last)
+{
+	/* FIXME: L2 cache should be taken care of if it exists */
+	do {
+		asm("mcr	p15, 0, %0, c7, c10, 1 @ flush_pgd"
+		    : : "r" (first));
+		first += L1_CACHE_BYTES / sizeof(*first);
+	} while (first <= last);
+}
+
+static void flush_iopte_range(u32 *first, u32 *last)
+{
+	/* FIXME: L2 cache should be taken care of if it exists */
+	do {
+		asm("mcr	p15, 0, %0, c7, c10, 1 @ flush_pte"
+		    : : "r" (first));
+		first += L1_CACHE_BYTES / sizeof(*first);
+	} while (first <= last);
+}
+
+static void iopte_free(u32 *iopte)
+{
+	/* Note: freed iopte's must be clean ready for re-use */
+	kmem_cache_free(iopte_cachep, iopte);
+}
+
+static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, u32 da)
+{
+	u32 *iopte;
+
+	/* a table has already existed */
+	if (*iopgd)
+		goto pte_ready;
+
+	/*
+	 * do the allocation outside the page table lock
+	 */
+	spin_unlock(&obj->page_table_lock);
+	iopte = kmem_cache_zalloc(iopte_cachep, GFP_KERNEL);
+	spin_lock(&obj->page_table_lock);
+
+	if (!*iopgd) {
+		if (!iopte)
+			return ERR_PTR(-ENOMEM);
+
+		*iopgd = virt_to_phys(iopte) | IOPGD_TABLE;
+		flush_iopgd_range(iopgd, iopgd);
+
+		dev_vdbg(obj->dev, "%s: a new pte:%p\n", __func__, iopte);
+	} else {
+		/* We raced, free the reduniovant table */
+		iopte_free(iopte);
+	}
+
+pte_ready:
+	iopte = iopte_offset(iopgd, da);
+
+	dev_vdbg(obj->dev,
+		 "%s: da:%08x pgd:%p *pgd:%08x pte:%p *pte:%08x\n",
+		 __func__, da, iopgd, *iopgd, iopte, *iopte);
+
+	return iopte;
+}
+
+static int iopgd_alloc_section(struct omap_iommu *obj, u32 da, u32 pa, u32 prot)
+{
+	u32 *iopgd = iopgd_offset(obj, da);
+
+	if ((da | pa) & ~IOSECTION_MASK) {
+		dev_err(obj->dev, "%s: %08x:%08x should aligned on %08lx\n",
+			__func__, da, pa, IOSECTION_SIZE);
+		return -EINVAL;
+	}
+
+	*iopgd = (pa & IOSECTION_MASK) | prot | IOPGD_SECTION;
+	flush_iopgd_range(iopgd, iopgd);
+	return 0;
+}
+
+static int iopgd_alloc_super(struct omap_iommu *obj, u32 da, u32 pa, u32 prot)
+{
+	u32 *iopgd = iopgd_offset(obj, da);
+	int i;
+
+	if ((da | pa) & ~IOSUPER_MASK) {
+		dev_err(obj->dev, "%s: %08x:%08x should aligned on %08lx\n",
+			__func__, da, pa, IOSUPER_SIZE);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < 16; i++)
+		*(iopgd + i) = (pa & IOSUPER_MASK) | prot | IOPGD_SUPER;
+	flush_iopgd_range(iopgd, iopgd + 15);
+	return 0;
+}
+
+static int iopte_alloc_page(struct omap_iommu *obj, u32 da, u32 pa, u32 prot)
+{
+	u32 *iopgd = iopgd_offset(obj, da);
+	u32 *iopte = iopte_alloc(obj, iopgd, da);
+
+	if (IS_ERR(iopte))
+		return PTR_ERR(iopte);
+
+	*iopte = (pa & IOPAGE_MASK) | prot | IOPTE_SMALL;
+	flush_iopte_range(iopte, iopte);
+
+	dev_vdbg(obj->dev, "%s: da:%08x pa:%08x pte:%p *pte:%08x\n",
+		 __func__, da, pa, iopte, *iopte);
+
+	return 0;
+}
+
+static int iopte_alloc_large(struct omap_iommu *obj, u32 da, u32 pa, u32 prot)
+{
+	u32 *iopgd = iopgd_offset(obj, da);
+	u32 *iopte = iopte_alloc(obj, iopgd, da);
+	int i;
+
+	if ((da | pa) & ~IOLARGE_MASK) {
+		dev_err(obj->dev, "%s: %08x:%08x should aligned on %08lx\n",
+			__func__, da, pa, IOLARGE_SIZE);
+		return -EINVAL;
+	}
+
+	if (IS_ERR(iopte))
+		return PTR_ERR(iopte);
+
+	for (i = 0; i < 16; i++)
+		*(iopte + i) = (pa & IOLARGE_MASK) | prot | IOPTE_LARGE;
+	flush_iopte_range(iopte, iopte + 15);
+	return 0;
+}
+
+static int
+iopgtable_store_entry_core(struct omap_iommu *obj, struct iotlb_entry *e)
+{
+	int (*fn)(struct omap_iommu *, u32, u32, u32);
+	u32 prot;
+	int err;
+
+	if (!obj || !e)
+		return -EINVAL;
+
+	switch (e->pgsz) {
+	case MMU_CAM_PGSZ_16M:
+		fn = iopgd_alloc_super;
+		break;
+	case MMU_CAM_PGSZ_1M:
+		fn = iopgd_alloc_section;
+		break;
+	case MMU_CAM_PGSZ_64K:
+		fn = iopte_alloc_large;
+		break;
+	case MMU_CAM_PGSZ_4K:
+		fn = iopte_alloc_page;
+		break;
+	default:
+		fn = NULL;
+		BUG();
+		break;
+	}
+
+	prot = get_iopte_attr(e);
+
+	spin_lock(&obj->page_table_lock);
+	err = fn(obj, e->da, e->pa, prot);
+	spin_unlock(&obj->page_table_lock);
+
+	return err;
+}
+
+/**
+ * omap_iopgtable_store_entry - Make an iommu pte entry
+ * @obj:	target iommu
+ * @e:		an iommu tlb entry info
+ **/
+int omap_iopgtable_store_entry(struct omap_iommu *obj, struct iotlb_entry *e)
+{
+	int err;
+
+	flush_iotlb_page(obj, e->da);
+	err = iopgtable_store_entry_core(obj, e);
+	if (!err)
+		prefetch_iotlb_entry(obj, e);
+	return err;
+}
+EXPORT_SYMBOL_GPL(omap_iopgtable_store_entry);
+
+/**
+ * iopgtable_lookup_entry - Lookup an iommu pte entry
+ * @obj:	target iommu
+ * @da:		iommu device virtual address
+ * @ppgd:	iommu pgd entry pointer to be returned
+ * @ppte:	iommu pte entry pointer to be returned
+ **/
+static void
+iopgtable_lookup_entry(struct omap_iommu *obj, u32 da, u32 **ppgd, u32 **ppte)
+{
+	u32 *iopgd, *iopte = NULL;
+
+	iopgd = iopgd_offset(obj, da);
+	if (!*iopgd)
+		goto out;
+
+	if (iopgd_is_table(*iopgd))
+		iopte = iopte_offset(iopgd, da);
+out:
+	*ppgd = iopgd;
+	*ppte = iopte;
+}
+
+static size_t iopgtable_clear_entry_core(struct omap_iommu *obj, u32 da)
+{
+	size_t bytes;
+	u32 *iopgd = iopgd_offset(obj, da);
+	int nent = 1;
+
+	if (!*iopgd)
+		return 0;
+
+	if (iopgd_is_table(*iopgd)) {
+		int i;
+		u32 *iopte = iopte_offset(iopgd, da);
+
+		bytes = IOPTE_SIZE;
+		if (*iopte & IOPTE_LARGE) {
+			nent *= 16;
+			/* rewind to the 1st entry */
+			iopte = iopte_offset(iopgd, (da & IOLARGE_MASK));
+		}
+		bytes *= nent;
+		memset(iopte, 0, nent * sizeof(*iopte));
+		flush_iopte_range(iopte, iopte + (nent - 1) * sizeof(*iopte));
+
+		/*
+		 * do table walk to check if this table is necessary or not
+		 */
+		iopte = iopte_offset(iopgd, 0);
+		for (i = 0; i < PTRS_PER_IOPTE; i++)
+			if (iopte[i])
+				goto out;
+
+		iopte_free(iopte);
+		nent = 1; /* for the next L1 entry */
+	} else {
+		bytes = IOPGD_SIZE;
+		if ((*iopgd & IOPGD_SUPER) == IOPGD_SUPER) {
+			nent *= 16;
+			/* rewind to the 1st entry */
+			iopgd = iopgd_offset(obj, (da & IOSUPER_MASK));
+		}
+		bytes *= nent;
+	}
+	memset(iopgd, 0, nent * sizeof(*iopgd));
+	flush_iopgd_range(iopgd, iopgd + (nent - 1) * sizeof(*iopgd));
+out:
+	return bytes;
+}
+
+/**
+ * iopgtable_clear_entry - Remove an iommu pte entry
+ * @obj:	target iommu
+ * @da:		iommu device virtual address
+ **/
+static size_t iopgtable_clear_entry(struct omap_iommu *obj, u32 da)
+{
+	size_t bytes;
+
+	spin_lock(&obj->page_table_lock);
+
+	bytes = iopgtable_clear_entry_core(obj, da);
+	flush_iotlb_page(obj, da);
+
+	spin_unlock(&obj->page_table_lock);
+
+	return bytes;
+}
+
+static void iopgtable_clear_entry_all(struct omap_iommu *obj)
+{
+	int i;
+
+	spin_lock(&obj->page_table_lock);
+
+	for (i = 0; i < PTRS_PER_IOPGD; i++) {
+		u32 da;
+		u32 *iopgd;
+
+		da = i << IOPGD_SHIFT;
+		iopgd = iopgd_offset(obj, da);
+
+		if (!*iopgd)
+			continue;
+
+		if (iopgd_is_table(*iopgd))
+			iopte_free(iopte_offset(iopgd, 0));
+
+		*iopgd = 0;
+		flush_iopgd_range(iopgd, iopgd);
+	}
+
+	flush_iotlb_all(obj);
+
+	spin_unlock(&obj->page_table_lock);
+}
+
+/*
+ *	Device IOMMU generic operations
+ */
+static irqreturn_t iommu_fault_handler(int irq, void *data)
+{
+	u32 da, errs;
+	u32 *iopgd, *iopte;
+	struct omap_iommu *obj = data;
+	struct iommu_domain *domain = obj->domain;
+
+	if (!obj->refcount)
+		return IRQ_NONE;
+
+	clk_enable(obj->clk);
+	errs = iommu_report_fault(obj, &da);
+	clk_disable(obj->clk);
+	if (errs == 0)
+		return IRQ_HANDLED;
+
+	/* Fault callback or TLB/PTE Dynamic loading */
+	if (!report_iommu_fault(domain, obj->dev, da, 0))
+		return IRQ_HANDLED;
+
+	iommu_disable(obj);
+
+	iopgd = iopgd_offset(obj, da);
+
+	if (!iopgd_is_table(*iopgd)) {
+		dev_err(obj->dev, "%s: errs:0x%08x da:0x%08x pgd:0x%p "
+			"*pgd:px%08x\n", obj->name, errs, da, iopgd, *iopgd);
+		return IRQ_NONE;
+	}
+
+	iopte = iopte_offset(iopgd, da);
+
+	dev_err(obj->dev, "%s: errs:0x%08x da:0x%08x pgd:0x%p *pgd:0x%08x "
+		"pte:0x%p *pte:0x%08x\n", obj->name, errs, da, iopgd, *iopgd,
+		iopte, *iopte);
+
+	return IRQ_NONE;
+}
+
+static int device_match_by_alias(struct device *dev, void *data)
+{
+	struct omap_iommu *obj = to_iommu(dev);
+	const char *name = data;
+
+	pr_debug("%s: %s %s\n", __func__, obj->name, name);
+
+	return strcmp(obj->name, name) == 0;
+}
+
+/**
+ * omap_iommu_attach() - attach iommu device to an iommu domain
+ * @name:	name of target omap iommu device
+ * @iopgd:	page table
+ **/
+static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd)
+{
+	int err = -ENOMEM;
+	struct device *dev;
+	struct omap_iommu *obj;
+
+	dev = driver_find_device(&omap_iommu_driver.driver, NULL,
+				(void *)name,
+				device_match_by_alias);
+	if (!dev)
+		return NULL;
+
+	obj = to_iommu(dev);
+
+	spin_lock(&obj->iommu_lock);
+
+	/* an iommu device can only be attached once */
+	if (++obj->refcount > 1) {
+		dev_err(dev, "%s: already attached!\n", obj->name);
+		err = -EBUSY;
+		goto err_enable;
+	}
+
+	obj->iopgd = iopgd;
+	err = iommu_enable(obj);
+	if (err)
+		goto err_enable;
+	flush_iotlb_all(obj);
+
+	if (!try_module_get(obj->owner))
+		goto err_module;
+
+	spin_unlock(&obj->iommu_lock);
+
+	dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name);
+	return obj;
+
+err_module:
+	if (obj->refcount == 1)
+		iommu_disable(obj);
+err_enable:
+	obj->refcount--;
+	spin_unlock(&obj->iommu_lock);
+	return ERR_PTR(err);
+}
+
+/**
+ * omap_iommu_detach - release iommu device
+ * @obj:	target iommu
+ **/
+static void omap_iommu_detach(struct omap_iommu *obj)
+{
+	if (!obj || IS_ERR(obj))
+		return;
+
+	spin_lock(&obj->iommu_lock);
+
+	if (--obj->refcount == 0)
+		iommu_disable(obj);
+
+	module_put(obj->owner);
+
+	obj->iopgd = NULL;
+
+	spin_unlock(&obj->iommu_lock);
+
+	dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name);
+}
+
+/*
+ *	OMAP Device MMU(IOMMU) detection
+ */
+static int __devinit omap_iommu_probe(struct platform_device *pdev)
+{
+	int err = -ENODEV;
+	int irq;
+	struct omap_iommu *obj;
+	struct resource *res;
+	struct iommu_platform_data *pdata = pdev->dev.platform_data;
+
+	if (pdev->num_resources != 2)
+		return -EINVAL;
+
+	obj = kzalloc(sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+
+	obj->clk = clk_get(&pdev->dev, pdata->clk_name);
+	if (IS_ERR(obj->clk))
+		goto err_clk;
+
+	obj->nr_tlb_entries = pdata->nr_tlb_entries;
+	obj->name = pdata->name;
+	obj->dev = &pdev->dev;
+	obj->ctx = (void *)obj + sizeof(*obj);
+	obj->da_start = pdata->da_start;
+	obj->da_end = pdata->da_end;
+
+	spin_lock_init(&obj->iommu_lock);
+	mutex_init(&obj->mmap_lock);
+	spin_lock_init(&obj->page_table_lock);
+	INIT_LIST_HEAD(&obj->mmap);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		err = -ENODEV;
+		goto err_mem;
+	}
+
+	res = request_mem_region(res->start, resource_size(res),
+				 dev_name(&pdev->dev));
+	if (!res) {
+		err = -EIO;
+		goto err_mem;
+	}
+
+	obj->regbase = ioremap(res->start, resource_size(res));
+	if (!obj->regbase) {
+		err = -ENOMEM;
+		goto err_ioremap;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		err = -ENODEV;
+		goto err_irq;
+	}
+	err = request_irq(irq, iommu_fault_handler, IRQF_SHARED,
+			  dev_name(&pdev->dev), obj);
+	if (err < 0)
+		goto err_irq;
+	platform_set_drvdata(pdev, obj);
+
+	dev_info(&pdev->dev, "%s registered\n", obj->name);
+	return 0;
+
+err_irq:
+	iounmap(obj->regbase);
+err_ioremap:
+	release_mem_region(res->start, resource_size(res));
+err_mem:
+	clk_put(obj->clk);
+err_clk:
+	kfree(obj);
+	return err;
+}
+
+static int __devexit omap_iommu_remove(struct platform_device *pdev)
+{
+	int irq;
+	struct resource *res;
+	struct omap_iommu *obj = platform_get_drvdata(pdev);
+
+	platform_set_drvdata(pdev, NULL);
+
+	iopgtable_clear_entry_all(obj);
+
+	irq = platform_get_irq(pdev, 0);
+	free_irq(irq, obj);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(res->start, resource_size(res));
+	iounmap(obj->regbase);
+
+	clk_put(obj->clk);
+	dev_info(&pdev->dev, "%s removed\n", obj->name);
+	kfree(obj);
+	return 0;
+}
+
+static struct platform_driver omap_iommu_driver = {
+	.probe	= omap_iommu_probe,
+	.remove	= __devexit_p(omap_iommu_remove),
+	.driver	= {
+		.name	= "omap-iommu",
+	},
+};
+
+static void iopte_cachep_ctor(void *iopte)
+{
+	clean_dcache_area(iopte, IOPTE_TABLE_SIZE);
+}
+
+static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
+			 phys_addr_t pa, size_t bytes, int prot)
+{
+	struct omap_iommu_domain *omap_domain = domain->priv;
+	struct omap_iommu *oiommu = omap_domain->iommu_dev;
+	struct device *dev = oiommu->dev;
+	struct iotlb_entry e;
+	int omap_pgsz;
+	u32 ret, flags;
+
+	/* we only support mapping a single iommu page for now */
+	omap_pgsz = bytes_to_iopgsz(bytes);
+	if (omap_pgsz < 0) {
+		dev_err(dev, "invalid size to map: %d\n", bytes);
+		return -EINVAL;
+	}
+
+	dev_dbg(dev, "mapping da 0x%lx to pa 0x%x size 0x%x\n", da, pa, bytes);
+
+	flags = omap_pgsz | prot;
+
+	iotlb_init_entry(&e, da, pa, flags);
+
+	ret = omap_iopgtable_store_entry(oiommu, &e);
+	if (ret)
+		dev_err(dev, "omap_iopgtable_store_entry failed: %d\n", ret);
+
+	return ret;
+}
+
+static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
+			    size_t size)
+{
+	struct omap_iommu_domain *omap_domain = domain->priv;
+	struct omap_iommu *oiommu = omap_domain->iommu_dev;
+	struct device *dev = oiommu->dev;
+
+	dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size);
+
+	return iopgtable_clear_entry(oiommu, da);
+}
+
+static int
+omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
+{
+	struct omap_iommu_domain *omap_domain = domain->priv;
+	struct omap_iommu *oiommu;
+	struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+	int ret = 0;
+
+	spin_lock(&omap_domain->lock);
+
+	/* only a single device is supported per domain for now */
+	if (omap_domain->iommu_dev) {
+		dev_err(dev, "iommu domain is already attached\n");
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/* get a handle to and enable the omap iommu */
+	oiommu = omap_iommu_attach(arch_data->name, omap_domain->pgtable);
+	if (IS_ERR(oiommu)) {
+		ret = PTR_ERR(oiommu);
+		dev_err(dev, "can't get omap iommu: %d\n", ret);
+		goto out;
+	}
+
+	omap_domain->iommu_dev = arch_data->iommu_dev = oiommu;
+	oiommu->domain = domain;
+
+out:
+	spin_unlock(&omap_domain->lock);
+	return ret;
+}
+
+static void omap_iommu_detach_dev(struct iommu_domain *domain,
+				 struct device *dev)
+{
+	struct omap_iommu_domain *omap_domain = domain->priv;
+	struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+	struct omap_iommu *oiommu = dev_to_omap_iommu(dev);
+
+	spin_lock(&omap_domain->lock);
+
+	/* only a single device is supported per domain for now */
+	if (omap_domain->iommu_dev != oiommu) {
+		dev_err(dev, "invalid iommu device\n");
+		goto out;
+	}
+
+	iopgtable_clear_entry_all(oiommu);
+
+	omap_iommu_detach(oiommu);
+
+	omap_domain->iommu_dev = arch_data->iommu_dev = NULL;
+
+out:
+	spin_unlock(&omap_domain->lock);
+}
+
+static int omap_iommu_domain_init(struct iommu_domain *domain)
+{
+	struct omap_iommu_domain *omap_domain;
+
+	omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL);
+	if (!omap_domain) {
+		pr_err("kzalloc failed\n");
+		goto out;
+	}
+
+	omap_domain->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_KERNEL);
+	if (!omap_domain->pgtable) {
+		pr_err("kzalloc failed\n");
+		goto fail_nomem;
+	}
+
+	/*
+	 * should never fail, but please keep this around to ensure
+	 * we keep the hardware happy
+	 */
+	BUG_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE));
+
+	clean_dcache_area(omap_domain->pgtable, IOPGD_TABLE_SIZE);
+	spin_lock_init(&omap_domain->lock);
+
+	domain->priv = omap_domain;
+
+	return 0;
+
+fail_nomem:
+	kfree(omap_domain);
+out:
+	return -ENOMEM;
+}
+
+/* assume device was already detached */
+static void omap_iommu_domain_destroy(struct iommu_domain *domain)
+{
+	struct omap_iommu_domain *omap_domain = domain->priv;
+
+	domain->priv = NULL;
+
+	kfree(omap_domain->pgtable);
+	kfree(omap_domain);
+}
+
+static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
+					  unsigned long da)
+{
+	struct omap_iommu_domain *omap_domain = domain->priv;
+	struct omap_iommu *oiommu = omap_domain->iommu_dev;
+	struct device *dev = oiommu->dev;
+	u32 *pgd, *pte;
+	phys_addr_t ret = 0;
+
+	iopgtable_lookup_entry(oiommu, da, &pgd, &pte);
+
+	if (pte) {
+		if (iopte_is_small(*pte))
+			ret = omap_iommu_translate(*pte, da, IOPTE_MASK);
+		else if (iopte_is_large(*pte))
+			ret = omap_iommu_translate(*pte, da, IOLARGE_MASK);
+		else
+			dev_err(dev, "bogus pte 0x%x, da 0x%lx", *pte, da);
+	} else {
+		if (iopgd_is_section(*pgd))
+			ret = omap_iommu_translate(*pgd, da, IOSECTION_MASK);
+		else if (iopgd_is_super(*pgd))
+			ret = omap_iommu_translate(*pgd, da, IOSUPER_MASK);
+		else
+			dev_err(dev, "bogus pgd 0x%x, da 0x%lx", *pgd, da);
+	}
+
+	return ret;
+}
+
+static int omap_iommu_domain_has_cap(struct iommu_domain *domain,
+				    unsigned long cap)
+{
+	return 0;
+}
+
+static struct iommu_ops omap_iommu_ops = {
+	.domain_init	= omap_iommu_domain_init,
+	.domain_destroy	= omap_iommu_domain_destroy,
+	.attach_dev	= omap_iommu_attach_dev,
+	.detach_dev	= omap_iommu_detach_dev,
+	.map		= omap_iommu_map,
+	.unmap		= omap_iommu_unmap,
+	.iova_to_phys	= omap_iommu_iova_to_phys,
+	.domain_has_cap	= omap_iommu_domain_has_cap,
+	.pgsize_bitmap	= OMAP_IOMMU_PGSIZES,
+};
+
+static int __init omap_iommu_init(void)
+{
+	struct kmem_cache *p;
+	const unsigned long flags = SLAB_HWCACHE_ALIGN;
+	size_t align = 1 << 10; /* L2 pagetable alignement */
+
+	p = kmem_cache_create("iopte_cache", IOPTE_TABLE_SIZE, align, flags,
+			      iopte_cachep_ctor);
+	if (!p)
+		return -ENOMEM;
+	iopte_cachep = p;
+
+	bus_set_iommu(&platform_bus_type, &omap_iommu_ops);
+
+	return platform_driver_register(&omap_iommu_driver);
+}
+module_init(omap_iommu_init);
+
+static void __exit omap_iommu_exit(void)
+{
+	kmem_cache_destroy(iopte_cachep);
+
+	platform_driver_unregister(&omap_iommu_driver);
+}
+module_exit(omap_iommu_exit);
+
+MODULE_DESCRIPTION("omap iommu: tlb and pagetable primitives");
+MODULE_ALIAS("platform:omap-iommu");
+MODULE_AUTHOR("Hiroshi DOYU, Paul Mundt and Toshihiro Kobayashi");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/omap-iovmm.c b/drivers/iommu/omap-iovmm.c
new file mode 100644
index 000000000000..2e10c3e0a7ae
--- /dev/null
+++ b/drivers/iommu/omap-iovmm.c
@@ -0,0 +1,747 @@
+/*
+ * omap iommu: simple virtual address space management
+ *
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/device.h>
+#include <linux/scatterlist.h>
+#include <linux/iommu.h>
+
+#include <asm/cacheflush.h>
+#include <asm/mach/map.h>
+
+#include <plat/iommu.h>
+#include <plat/iovmm.h>
+
+#include <plat/iopgtable.h>
+
+static struct kmem_cache *iovm_area_cachep;
+
+/* return the offset of the first scatterlist entry in a sg table */
+static unsigned int sgtable_offset(const struct sg_table *sgt)
+{
+	if (!sgt || !sgt->nents)
+		return 0;
+
+	return sgt->sgl->offset;
+}
+
+/* return total bytes of sg buffers */
+static size_t sgtable_len(const struct sg_table *sgt)
+{
+	unsigned int i, total = 0;
+	struct scatterlist *sg;
+
+	if (!sgt)
+		return 0;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		size_t bytes;
+
+		bytes = sg->length + sg->offset;
+
+		if (!iopgsz_ok(bytes)) {
+			pr_err("%s: sg[%d] not iommu pagesize(%u %u)\n",
+			       __func__, i, bytes, sg->offset);
+			return 0;
+		}
+
+		if (i && sg->offset) {
+			pr_err("%s: sg[%d] offset not allowed in internal "
+					"entries\n", __func__, i);
+			return 0;
+		}
+
+		total += bytes;
+	}
+
+	return total;
+}
+#define sgtable_ok(x)	(!!sgtable_len(x))
+
+static unsigned max_alignment(u32 addr)
+{
+	int i;
+	unsigned pagesize[] = { SZ_16M, SZ_1M, SZ_64K, SZ_4K, };
+	for (i = 0; i < ARRAY_SIZE(pagesize) && addr & (pagesize[i] - 1); i++)
+		;
+	return (i < ARRAY_SIZE(pagesize)) ? pagesize[i] : 0;
+}
+
+/*
+ * calculate the optimal number sg elements from total bytes based on
+ * iommu superpages
+ */
+static unsigned sgtable_nents(size_t bytes, u32 da, u32 pa)
+{
+	unsigned nr_entries = 0, ent_sz;
+
+	if (!IS_ALIGNED(bytes, PAGE_SIZE)) {
+		pr_err("%s: wrong size %08x\n", __func__, bytes);
+		return 0;
+	}
+
+	while (bytes) {
+		ent_sz = max_alignment(da | pa);
+		ent_sz = min_t(unsigned, ent_sz, iopgsz_max(bytes));
+		nr_entries++;
+		da += ent_sz;
+		pa += ent_sz;
+		bytes -= ent_sz;
+	}
+
+	return nr_entries;
+}
+
+/* allocate and initialize sg_table header(a kind of 'superblock') */
+static struct sg_table *sgtable_alloc(const size_t bytes, u32 flags,
+							u32 da, u32 pa)
+{
+	unsigned int nr_entries;
+	int err;
+	struct sg_table *sgt;
+
+	if (!bytes)
+		return ERR_PTR(-EINVAL);
+
+	if (!IS_ALIGNED(bytes, PAGE_SIZE))
+		return ERR_PTR(-EINVAL);
+
+	if (flags & IOVMF_LINEAR) {
+		nr_entries = sgtable_nents(bytes, da, pa);
+		if (!nr_entries)
+			return ERR_PTR(-EINVAL);
+	} else
+		nr_entries =  bytes / PAGE_SIZE;
+
+	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+	if (!sgt)
+		return ERR_PTR(-ENOMEM);
+
+	err = sg_alloc_table(sgt, nr_entries, GFP_KERNEL);
+	if (err) {
+		kfree(sgt);
+		return ERR_PTR(err);
+	}
+
+	pr_debug("%s: sgt:%p(%d entries)\n", __func__, sgt, nr_entries);
+
+	return sgt;
+}
+
+/* free sg_table header(a kind of superblock) */
+static void sgtable_free(struct sg_table *sgt)
+{
+	if (!sgt)
+		return;
+
+	sg_free_table(sgt);
+	kfree(sgt);
+
+	pr_debug("%s: sgt:%p\n", __func__, sgt);
+}
+
+/* map 'sglist' to a contiguous mpu virtual area and return 'va' */
+static void *vmap_sg(const struct sg_table *sgt)
+{
+	u32 va;
+	size_t total;
+	unsigned int i;
+	struct scatterlist *sg;
+	struct vm_struct *new;
+	const struct mem_type *mtype;
+
+	mtype = get_mem_type(MT_DEVICE);
+	if (!mtype)
+		return ERR_PTR(-EINVAL);
+
+	total = sgtable_len(sgt);
+	if (!total)
+		return ERR_PTR(-EINVAL);
+
+	new = __get_vm_area(total, VM_IOREMAP, VMALLOC_START, VMALLOC_END);
+	if (!new)
+		return ERR_PTR(-ENOMEM);
+	va = (u32)new->addr;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		size_t bytes;
+		u32 pa;
+		int err;
+
+		pa = sg_phys(sg) - sg->offset;
+		bytes = sg->length + sg->offset;
+
+		BUG_ON(bytes != PAGE_SIZE);
+
+		err = ioremap_page(va,  pa, mtype);
+		if (err)
+			goto err_out;
+
+		va += bytes;
+	}
+
+	flush_cache_vmap((unsigned long)new->addr,
+				(unsigned long)(new->addr + total));
+	return new->addr;
+
+err_out:
+	WARN_ON(1); /* FIXME: cleanup some mpu mappings */
+	vunmap(new->addr);
+	return ERR_PTR(-EAGAIN);
+}
+
+static inline void vunmap_sg(const void *va)
+{
+	vunmap(va);
+}
+
+static struct iovm_struct *__find_iovm_area(struct omap_iommu *obj,
+							const u32 da)
+{
+	struct iovm_struct *tmp;
+
+	list_for_each_entry(tmp, &obj->mmap, list) {
+		if ((da >= tmp->da_start) && (da < tmp->da_end)) {
+			size_t len;
+
+			len = tmp->da_end - tmp->da_start;
+
+			dev_dbg(obj->dev, "%s: %08x-%08x-%08x(%x) %08x\n",
+				__func__, tmp->da_start, da, tmp->da_end, len,
+				tmp->flags);
+
+			return tmp;
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * omap_find_iovm_area  -  find iovma which includes @da
+ * @dev:	client device
+ * @da:		iommu device virtual address
+ *
+ * Find the existing iovma starting at @da
+ */
+struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da)
+{
+	struct omap_iommu *obj = dev_to_omap_iommu(dev);
+	struct iovm_struct *area;
+
+	mutex_lock(&obj->mmap_lock);
+	area = __find_iovm_area(obj, da);
+	mutex_unlock(&obj->mmap_lock);
+
+	return area;
+}
+EXPORT_SYMBOL_GPL(omap_find_iovm_area);
+
+/*
+ * This finds the hole(area) which fits the requested address and len
+ * in iovmas mmap, and returns the new allocated iovma.
+ */
+static struct iovm_struct *alloc_iovm_area(struct omap_iommu *obj, u32 da,
+					   size_t bytes, u32 flags)
+{
+	struct iovm_struct *new, *tmp;
+	u32 start, prev_end, alignment;
+
+	if (!obj || !bytes)
+		return ERR_PTR(-EINVAL);
+
+	start = da;
+	alignment = PAGE_SIZE;
+
+	if (~flags & IOVMF_DA_FIXED) {
+		/* Don't map address 0 */
+		start = obj->da_start ? obj->da_start : alignment;
+
+		if (flags & IOVMF_LINEAR)
+			alignment = iopgsz_max(bytes);
+		start = roundup(start, alignment);
+	} else if (start < obj->da_start || start > obj->da_end ||
+					obj->da_end - start < bytes) {
+		return ERR_PTR(-EINVAL);
+	}
+
+	tmp = NULL;
+	if (list_empty(&obj->mmap))
+		goto found;
+
+	prev_end = 0;
+	list_for_each_entry(tmp, &obj->mmap, list) {
+
+		if (prev_end > start)
+			break;
+
+		if (tmp->da_start > start && (tmp->da_start - start) >= bytes)
+			goto found;
+
+		if (tmp->da_end >= start && ~flags & IOVMF_DA_FIXED)
+			start = roundup(tmp->da_end + 1, alignment);
+
+		prev_end = tmp->da_end;
+	}
+
+	if ((start >= prev_end) && (obj->da_end - start >= bytes))
+		goto found;
+
+	dev_dbg(obj->dev, "%s: no space to fit %08x(%x) flags: %08x\n",
+		__func__, da, bytes, flags);
+
+	return ERR_PTR(-EINVAL);
+
+found:
+	new = kmem_cache_zalloc(iovm_area_cachep, GFP_KERNEL);
+	if (!new)
+		return ERR_PTR(-ENOMEM);
+
+	new->iommu = obj;
+	new->da_start = start;
+	new->da_end = start + bytes;
+	new->flags = flags;
+
+	/*
+	 * keep ascending order of iovmas
+	 */
+	if (tmp)
+		list_add_tail(&new->list, &tmp->list);
+	else
+		list_add(&new->list, &obj->mmap);
+
+	dev_dbg(obj->dev, "%s: found %08x-%08x-%08x(%x) %08x\n",
+		__func__, new->da_start, start, new->da_end, bytes, flags);
+
+	return new;
+}
+
+static void free_iovm_area(struct omap_iommu *obj, struct iovm_struct *area)
+{
+	size_t bytes;
+
+	BUG_ON(!obj || !area);
+
+	bytes = area->da_end - area->da_start;
+
+	dev_dbg(obj->dev, "%s: %08x-%08x(%x) %08x\n",
+		__func__, area->da_start, area->da_end, bytes, area->flags);
+
+	list_del(&area->list);
+	kmem_cache_free(iovm_area_cachep, area);
+}
+
+/**
+ * omap_da_to_va - convert (d) to (v)
+ * @dev:	client device
+ * @da:		iommu device virtual address
+ * @va:		mpu virtual address
+ *
+ * Returns mpu virtual addr which corresponds to a given device virtual addr
+ */
+void *omap_da_to_va(struct device *dev, u32 da)
+{
+	struct omap_iommu *obj = dev_to_omap_iommu(dev);
+	void *va = NULL;
+	struct iovm_struct *area;
+
+	mutex_lock(&obj->mmap_lock);
+
+	area = __find_iovm_area(obj, da);
+	if (!area) {
+		dev_dbg(obj->dev, "%s: no da area(%08x)\n", __func__, da);
+		goto out;
+	}
+	va = area->va;
+out:
+	mutex_unlock(&obj->mmap_lock);
+
+	return va;
+}
+EXPORT_SYMBOL_GPL(omap_da_to_va);
+
+static void sgtable_fill_vmalloc(struct sg_table *sgt, void *_va)
+{
+	unsigned int i;
+	struct scatterlist *sg;
+	void *va = _va;
+	void *va_end;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		struct page *pg;
+		const size_t bytes = PAGE_SIZE;
+
+		/*
+		 * iommu 'superpage' isn't supported with 'omap_iommu_vmalloc()'
+		 */
+		pg = vmalloc_to_page(va);
+		BUG_ON(!pg);
+		sg_set_page(sg, pg, bytes, 0);
+
+		va += bytes;
+	}
+
+	va_end = _va + PAGE_SIZE * i;
+}
+
+static inline void sgtable_drain_vmalloc(struct sg_table *sgt)
+{
+	/*
+	 * Actually this is not necessary at all, just exists for
+	 * consistency of the code readability.
+	 */
+	BUG_ON(!sgt);
+}
+
+/* create 'da' <-> 'pa' mapping from 'sgt' */
+static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
+			const struct sg_table *sgt, u32 flags)
+{
+	int err;
+	unsigned int i, j;
+	struct scatterlist *sg;
+	u32 da = new->da_start;
+
+	if (!domain || !sgt)
+		return -EINVAL;
+
+	BUG_ON(!sgtable_ok(sgt));
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		u32 pa;
+		size_t bytes;
+
+		pa = sg_phys(sg) - sg->offset;
+		bytes = sg->length + sg->offset;
+
+		flags &= ~IOVMF_PGSZ_MASK;
+
+		if (bytes_to_iopgsz(bytes) < 0)
+			goto err_out;
+
+		pr_debug("%s: [%d] %08x %08x(%x)\n", __func__,
+			 i, da, pa, bytes);
+
+		err = iommu_map(domain, da, pa, bytes, flags);
+		if (err)
+			goto err_out;
+
+		da += bytes;
+	}
+	return 0;
+
+err_out:
+	da = new->da_start;
+
+	for_each_sg(sgt->sgl, sg, i, j) {
+		size_t bytes;
+
+		bytes = sg->length + sg->offset;
+
+		/* ignore failures.. we're already handling one */
+		iommu_unmap(domain, da, bytes);
+
+		da += bytes;
+	}
+	return err;
+}
+
+/* release 'da' <-> 'pa' mapping */
+static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj,
+						struct iovm_struct *area)
+{
+	u32 start;
+	size_t total = area->da_end - area->da_start;
+	const struct sg_table *sgt = area->sgt;
+	struct scatterlist *sg;
+	int i;
+	size_t unmapped;
+
+	BUG_ON(!sgtable_ok(sgt));
+	BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE));
+
+	start = area->da_start;
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		size_t bytes;
+
+		bytes = sg->length + sg->offset;
+
+		unmapped = iommu_unmap(domain, start, bytes);
+		if (unmapped < bytes)
+			break;
+
+		dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n",
+				__func__, start, bytes, area->flags);
+
+		BUG_ON(!IS_ALIGNED(bytes, PAGE_SIZE));
+
+		total -= bytes;
+		start += bytes;
+	}
+	BUG_ON(total);
+}
+
+/* template function for all unmapping */
+static struct sg_table *unmap_vm_area(struct iommu_domain *domain,
+				      struct omap_iommu *obj, const u32 da,
+				      void (*fn)(const void *), u32 flags)
+{
+	struct sg_table *sgt = NULL;
+	struct iovm_struct *area;
+
+	if (!IS_ALIGNED(da, PAGE_SIZE)) {
+		dev_err(obj->dev, "%s: alignment err(%08x)\n", __func__, da);
+		return NULL;
+	}
+
+	mutex_lock(&obj->mmap_lock);
+
+	area = __find_iovm_area(obj, da);
+	if (!area) {
+		dev_dbg(obj->dev, "%s: no da area(%08x)\n", __func__, da);
+		goto out;
+	}
+
+	if ((area->flags & flags) != flags) {
+		dev_err(obj->dev, "%s: wrong flags(%08x)\n", __func__,
+			area->flags);
+		goto out;
+	}
+	sgt = (struct sg_table *)area->sgt;
+
+	unmap_iovm_area(domain, obj, area);
+
+	fn(area->va);
+
+	dev_dbg(obj->dev, "%s: %08x-%08x-%08x(%x) %08x\n", __func__,
+		area->da_start, da, area->da_end,
+		area->da_end - area->da_start, area->flags);
+
+	free_iovm_area(obj, area);
+out:
+	mutex_unlock(&obj->mmap_lock);
+
+	return sgt;
+}
+
+static u32 map_iommu_region(struct iommu_domain *domain, struct omap_iommu *obj,
+				u32 da, const struct sg_table *sgt, void *va,
+				size_t bytes, u32 flags)
+{
+	int err = -ENOMEM;
+	struct iovm_struct *new;
+
+	mutex_lock(&obj->mmap_lock);
+
+	new = alloc_iovm_area(obj, da, bytes, flags);
+	if (IS_ERR(new)) {
+		err = PTR_ERR(new);
+		goto err_alloc_iovma;
+	}
+	new->va = va;
+	new->sgt = sgt;
+
+	if (map_iovm_area(domain, new, sgt, new->flags))
+		goto err_map;
+
+	mutex_unlock(&obj->mmap_lock);
+
+	dev_dbg(obj->dev, "%s: da:%08x(%x) flags:%08x va:%p\n",
+		__func__, new->da_start, bytes, new->flags, va);
+
+	return new->da_start;
+
+err_map:
+	free_iovm_area(obj, new);
+err_alloc_iovma:
+	mutex_unlock(&obj->mmap_lock);
+	return err;
+}
+
+static inline u32
+__iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj,
+				u32 da, const struct sg_table *sgt,
+				void *va, size_t bytes, u32 flags)
+{
+	return map_iommu_region(domain, obj, da, sgt, va, bytes, flags);
+}
+
+/**
+ * omap_iommu_vmap  -  (d)-(p)-(v) address mapper
+ * @domain:	iommu domain
+ * @dev:	client device
+ * @sgt:	address of scatter gather table
+ * @flags:	iovma and page property
+ *
+ * Creates 1-n-1 mapping with given @sgt and returns @da.
+ * All @sgt element must be io page size aligned.
+ */
+u32 omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da,
+		const struct sg_table *sgt, u32 flags)
+{
+	struct omap_iommu *obj = dev_to_omap_iommu(dev);
+	size_t bytes;
+	void *va = NULL;
+
+	if (!obj || !obj->dev || !sgt)
+		return -EINVAL;
+
+	bytes = sgtable_len(sgt);
+	if (!bytes)
+		return -EINVAL;
+	bytes = PAGE_ALIGN(bytes);
+
+	if (flags & IOVMF_MMIO) {
+		va = vmap_sg(sgt);
+		if (IS_ERR(va))
+			return PTR_ERR(va);
+	}
+
+	flags |= IOVMF_DISCONT;
+	flags |= IOVMF_MMIO;
+
+	da = __iommu_vmap(domain, obj, da, sgt, va, bytes, flags);
+	if (IS_ERR_VALUE(da))
+		vunmap_sg(va);
+
+	return da + sgtable_offset(sgt);
+}
+EXPORT_SYMBOL_GPL(omap_iommu_vmap);
+
+/**
+ * omap_iommu_vunmap  -  release virtual mapping obtained by 'omap_iommu_vmap()'
+ * @domain:	iommu domain
+ * @dev:	client device
+ * @da:		iommu device virtual address
+ *
+ * Free the iommu virtually contiguous memory area starting at
+ * @da, which was returned by 'omap_iommu_vmap()'.
+ */
+struct sg_table *
+omap_iommu_vunmap(struct iommu_domain *domain, struct device *dev, u32 da)
+{
+	struct omap_iommu *obj = dev_to_omap_iommu(dev);
+	struct sg_table *sgt;
+	/*
+	 * 'sgt' is allocated before 'omap_iommu_vmalloc()' is called.
+	 * Just returns 'sgt' to the caller to free
+	 */
+	da &= PAGE_MASK;
+	sgt = unmap_vm_area(domain, obj, da, vunmap_sg,
+					IOVMF_DISCONT | IOVMF_MMIO);
+	if (!sgt)
+		dev_dbg(obj->dev, "%s: No sgt\n", __func__);
+	return sgt;
+}
+EXPORT_SYMBOL_GPL(omap_iommu_vunmap);
+
+/**
+ * omap_iommu_vmalloc  -  (d)-(p)-(v) address allocator and mapper
+ * @dev:	client device
+ * @da:		contiguous iommu virtual memory
+ * @bytes:	allocation size
+ * @flags:	iovma and page property
+ *
+ * Allocate @bytes linearly and creates 1-n-1 mapping and returns
+ * @da again, which might be adjusted if 'IOVMF_DA_FIXED' is not set.
+ */
+u32
+omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev, u32 da,
+						size_t bytes, u32 flags)
+{
+	struct omap_iommu *obj = dev_to_omap_iommu(dev);
+	void *va;
+	struct sg_table *sgt;
+
+	if (!obj || !obj->dev || !bytes)
+		return -EINVAL;
+
+	bytes = PAGE_ALIGN(bytes);
+
+	va = vmalloc(bytes);
+	if (!va)
+		return -ENOMEM;
+
+	flags |= IOVMF_DISCONT;
+	flags |= IOVMF_ALLOC;
+
+	sgt = sgtable_alloc(bytes, flags, da, 0);
+	if (IS_ERR(sgt)) {
+		da = PTR_ERR(sgt);
+		goto err_sgt_alloc;
+	}
+	sgtable_fill_vmalloc(sgt, va);
+
+	da = __iommu_vmap(domain, obj, da, sgt, va, bytes, flags);
+	if (IS_ERR_VALUE(da))
+		goto err_iommu_vmap;
+
+	return da;
+
+err_iommu_vmap:
+	sgtable_drain_vmalloc(sgt);
+	sgtable_free(sgt);
+err_sgt_alloc:
+	vfree(va);
+	return da;
+}
+EXPORT_SYMBOL_GPL(omap_iommu_vmalloc);
+
+/**
+ * omap_iommu_vfree  -  release memory allocated by 'omap_iommu_vmalloc()'
+ * @dev:	client device
+ * @da:		iommu device virtual address
+ *
+ * Frees the iommu virtually continuous memory area starting at
+ * @da, as obtained from 'omap_iommu_vmalloc()'.
+ */
+void omap_iommu_vfree(struct iommu_domain *domain, struct device *dev,
+								const u32 da)
+{
+	struct omap_iommu *obj = dev_to_omap_iommu(dev);
+	struct sg_table *sgt;
+
+	sgt = unmap_vm_area(domain, obj, da, vfree,
+						IOVMF_DISCONT | IOVMF_ALLOC);
+	if (!sgt)
+		dev_dbg(obj->dev, "%s: No sgt\n", __func__);
+	sgtable_free(sgt);
+}
+EXPORT_SYMBOL_GPL(omap_iommu_vfree);
+
+static int __init iovmm_init(void)
+{
+	const unsigned long flags = SLAB_HWCACHE_ALIGN;
+	struct kmem_cache *p;
+
+	p = kmem_cache_create("iovm_area_cache", sizeof(struct iovm_struct), 0,
+			      flags, NULL);
+	if (!p)
+		return -ENOMEM;
+	iovm_area_cachep = p;
+
+	return 0;
+}
+module_init(iovmm_init);
+
+static void __exit iovmm_exit(void)
+{
+	kmem_cache_destroy(iovm_area_cachep);
+}
+module_exit(iovmm_exit);
+
+MODULE_DESCRIPTION("omap iommu: simple virtual address space management");
+MODULE_AUTHOR("Hiroshi DOYU <Hiroshi.DOYU@nokia.com>");
+MODULE_LICENSE("GPL v2");
author	Rohan Somvanshi <rsomvanshi@nvidia.com>	2012-02-01 16:03:06 +0530
committer	Rohan Somvanshi <rsomvanshi@nvidia.com>	2012-02-01 16:03:06 +0530
commit	13ab2df1dfed5153899b98b1417f52de0ecc70cd (patch)
tree	9b47b72d23fef02d1b489a845547106430552be9 /drivers/iommu
parent	c8932c9184978b42398fd582592315cbbf507415 (diff)
parent	ac2b520c46800dec16f3c23d0cf8759826750937 (diff)