From cca547e9aa3a6d561fe65e75a4bb2c18d80c541a Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 17 Dec 2014 17:57:38 +0100
Subject: ARM: 8249/1: mm: dump: don't skip regions

Currently the arm page table dumping code starts dumping page tables
from USER_PGTABLES_CEILING. This is unnecessary for skipping any entries
related to userspace as the swapper_pg_dir does not contain such
entries, and results in a couple of unfortuante side effects.

Firstly, any kernel mappings which might exist below
USER_PGTABLES_CEILING will not be accounted in the dump output. This
masks any entries erroneously created below this address.

Secondly, if the final page table entry walked is part of a valid
mapping the page table dumping code will not log the region this entry
is part of, as the final note_page call in walk_pgd will trigger an
early return when 0 < USER_PGTABLES_CEILING. Luckily this isn't seen on
contemporary systems as they typically don't have enough RAM to extend
the linear mapping right to the end of the address space.

Due to the way addr is constructed in the walk_* functions, it can never
be less than USER_PGTABLES_CEILING when walking the page tables, so it
is not necessary to avoid dereferencing invalid table addresses. The
existing checks for st->current_prot and st->marker[1].start_address are
sufficient to ensure we will not print and/or dereference garbage when
trying to log information.

This patch removes both problematic uses of USER_PGTABLES_CEILING from
the arm page table dumping code, preventing both of these issues. We
will now report any low mappings, and the final note_page call will not
return early, ensuring all regions are logged.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Steve Capper <steve.capper@linaro.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/dump.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c
index 59424937e52b..9fe8e241335c 100644
--- a/arch/arm/mm/dump.c
+++ b/arch/arm/mm/dump.c
@@ -220,9 +220,6 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u
 	static const char units[] = "KMGTPE";
 	u64 prot = val & pg_level[level].mask;
 
-	if (addr < USER_PGTABLES_CEILING)
-		return;
-
 	if (!st->level) {
 		st->level = level;
 		st->current_prot = prot;
@@ -308,15 +305,13 @@ static void walk_pgd(struct seq_file *m)
 	pgd_t *pgd = swapper_pg_dir;
 	struct pg_state st;
 	unsigned long addr;
-	unsigned i, pgdoff = USER_PGTABLES_CEILING / PGDIR_SIZE;
+	unsigned i;
 
 	memset(&st, 0, sizeof(st));
 	st.seq = m;
 	st.marker = address_markers;
 
-	pgd += pgdoff;
-
-	for (i = pgdoff; i < PTRS_PER_PGD; i++, pgd++) {
+	for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
 		addr = i * PGDIR_SIZE;
 		if (!pgd_none(*pgd)) {
 			walk_pud(&st, pgd, addr);
-- 
cgit v1.2.3


From ac08468867e99bc02b22baf4e58bc3537e9d852c Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@linaro.org>
Date: Tue, 23 Dec 2014 19:36:55 +0100
Subject: ARM: 8253/1: mm: use phys_addr_t type in map_lowmem() for kernel mem
 region

Now local variables kernel_x_start and kernel_x_end defined using
'unsigned long' type which is wrong because they represent physical
memory range and will be calculated wrongly if LPAE is enabled.
As result, all following code in map_lowmem() will not work correctly.

For example, Keystone 2 boot is broken because
 kernel_x_start == 0x0000 0000
 kernel_x_end   == 0x0080 0000

instead of
 kernel_x_start == 0x0000 0008 0000 0000
 kernel_x_end   == 0x0000 0008 0080 0000
and as result whole low memory will be mapped with MT_MEMORY_RW
permissions by code (start > kernel_x_end):
		} else if (start >= kernel_x_end) {
			map.pfn = __phys_to_pfn(start);
			map.virtual = __phys_to_virt(start);
			map.length = end - start;
			map.type = MT_MEMORY_RW;

			create_mapping(&map);
		}

Hence, fix it by using phys_addr_t type for variables kernel_x_start
and kernel_x_end.

Tested-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/mmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index cda7c40999b6..4e6ef896c619 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1329,8 +1329,8 @@ static void __init kmap_init(void)
 static void __init map_lowmem(void)
 {
 	struct memblock_region *reg;
-	unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
-	unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
+	phys_addr_t kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
+	phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
 
 	/* Map all the lowmem memory banks. */
 	for_each_memblock(memory, reg) {
-- 
cgit v1.2.3


From 1e3479225acbb7ae048ac30fb7c6090fa7f0df02 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Fri, 9 Jan 2015 18:55:45 +0100
Subject: ARM: 8275/1: mm: fix PMD_SECT_RDONLY undeclared compile error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In v3.19-rc3 tree when CONFIG_ARM_LPAE and CONFIG_DEBUG_RODATA are enabled
image failed to compile with the following error:

arch/arm/mm/init.c:661:14: error: ‘PMD_SECT_RDONLY’ undeclared here (not in a function)

It seems that '80d6b0c ARM: mm: allow text and rodata sections to be read-only'
and 'ded9477 ARM: 8109/1: mm: Modify pte_write and pmd_write logic for LPAE'
commits crossed. 80d6b0c uses PMD_SECT_RDONLY macro but ded9477 renames it
and uses software bits L_PMD_SECT_RDONLY instead.

Fix is to use L_PMD_SECT_RDONLY instead PMD_SECT_RDONLY as ded9477 does in
another places.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 98ad9c79ea0e..2495c8cb47ba 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -658,8 +658,8 @@ static struct section_perm ro_perms[] = {
 		.start  = (unsigned long)_stext,
 		.end    = (unsigned long)__init_begin,
 #ifdef CONFIG_ARM_LPAE
-		.mask   = ~PMD_SECT_RDONLY,
-		.prot   = PMD_SECT_RDONLY,
+		.mask   = ~L_PMD_SECT_RDONLY,
+		.prot   = L_PMD_SECT_RDONLY,
 #else
 		.mask   = ~(PMD_SECT_APX | PMD_SECT_AP_WRITE),
 		.prot   = PMD_SECT_APX | PMD_SECT_AP_WRITE,
-- 
cgit v1.2.3


From 00218241aa0846e75d31b1dbadb5f8a76be1cc97 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 8 Jan 2015 07:49:41 +0100
Subject: ARM: 8258/1: l2c: use l2c_write_sec() for restoring latency and
 filter regs

All four register for latency and filter settings cannot be written in
non-secure mode and they should go through l2c_write_sec(). More on this
can be found in CoreLink Level 2 Cache Controller L2C-310 Technical
Reference Manual, 3.2. Register summary, table 3.1. This have been checked
the TRM for r3p3, but it should be uniform for all revisions.

Reported-by: Nishanth Menon <nm@ti.com>
Suggested-by: Tomasz Figa <tomasz.figa@gmail.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Nishanth Menon <nm@ti.com>
Acked-by: Nishanth Menon <nm@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 5e65ca8dea62..b83c401ca50c 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -623,14 +623,14 @@ static void l2c310_resume(void)
 		unsigned revision;
 
 		/* restore pl310 setup */
-		writel_relaxed(l2x0_saved_regs.tag_latency,
-			       base + L310_TAG_LATENCY_CTRL);
-		writel_relaxed(l2x0_saved_regs.data_latency,
-			       base + L310_DATA_LATENCY_CTRL);
-		writel_relaxed(l2x0_saved_regs.filter_end,
-			       base + L310_ADDR_FILTER_END);
-		writel_relaxed(l2x0_saved_regs.filter_start,
-			       base + L310_ADDR_FILTER_START);
+		l2c_write_sec(l2x0_saved_regs.tag_latency, base,
+			      L310_TAG_LATENCY_CTRL);
+		l2c_write_sec(l2x0_saved_regs.data_latency, base,
+			      L310_DATA_LATENCY_CTRL);
+		l2c_write_sec(l2x0_saved_regs.filter_end, base,
+			      L310_ADDR_FILTER_END);
+		l2c_write_sec(l2x0_saved_regs.filter_start, base,
+			      L310_ADDR_FILTER_START);
 
 		revision = readl_relaxed(base + L2X0_CACHE_ID) &
 				L2X0_CACHE_ID_RTL_MASK;
@@ -1135,28 +1135,28 @@ static void __init l2c310_of_parse(const struct device_node *np,
 
 	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
 	if (tag[0] && tag[1] && tag[2])
-		writel_relaxed(
+		l2c_write_sec(
 			L310_LATENCY_CTRL_RD(tag[0] - 1) |
 			L310_LATENCY_CTRL_WR(tag[1] - 1) |
 			L310_LATENCY_CTRL_SETUP(tag[2] - 1),
-			l2x0_base + L310_TAG_LATENCY_CTRL);
+			l2x0_base, L310_TAG_LATENCY_CTRL);
 
 	of_property_read_u32_array(np, "arm,data-latency",
 				   data, ARRAY_SIZE(data));
 	if (data[0] && data[1] && data[2])
-		writel_relaxed(
+		l2c_write_sec(
 			L310_LATENCY_CTRL_RD(data[0] - 1) |
 			L310_LATENCY_CTRL_WR(data[1] - 1) |
 			L310_LATENCY_CTRL_SETUP(data[2] - 1),
-			l2x0_base + L310_DATA_LATENCY_CTRL);
+			l2x0_base,  L310_DATA_LATENCY_CTRL);
 
 	of_property_read_u32_array(np, "arm,filter-ranges",
 				   filter, ARRAY_SIZE(filter));
 	if (filter[1]) {
-		writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
-			       l2x0_base + L310_ADDR_FILTER_END);
-		writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN,
-			       l2x0_base + L310_ADDR_FILTER_START);
+		l2c_write_sec(ALIGN(filter[0] + filter[1], SZ_1M),
+			      l2x0_base, L310_ADDR_FILTER_END);
+		l2c_write_sec((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN,
+			      l2x0_base, L310_ADDR_FILTER_START);
 	}
 
 	ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K);
-- 
cgit v1.2.3


From 6b49241ac2525818508ee2baff9a58541c65421c Mon Sep 17 00:00:00 2001
From: Tomasz Figa <t.figa@samsung.com>
Date: Thu, 8 Jan 2015 07:50:29 +0100
Subject: ARM: 8259/1: l2c: Refactor the driver to use commit-like interface

Certain implementations of secure hypervisors (namely the one found on
Samsung Exynos-based boards) do not provide access to individual L2C
registers. This makes the .write_sec()-based interface insufficient and
provoking ugly hacks.

This patch is first step to make the driver not rely on availability of
writes to individual registers. This is achieved by refactoring the
driver to use a commit-like operation scheme: all register values are
prepared first and stored in an instance of l2x0_regs struct and then a
single callback is responsible to flush those values to the hardware.

[mszyprow: rebased onto 'ARM: l2c: use l2c_write_sec() for restoring
 latency and filter regs' patch]

Signed-off-by: Tomasz Figa <t.figa@samsung.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Nishanth Menon <nm@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 212 ++++++++++++++++++++++++++---------------------
 1 file changed, 116 insertions(+), 96 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index b83c401ca50c..dde0d54ac41e 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -41,12 +41,14 @@ struct l2c_init_data {
 	void (*enable)(void __iomem *, u32, unsigned);
 	void (*fixup)(void __iomem *, u32, struct outer_cache_fns *);
 	void (*save)(void __iomem *);
+	void (*configure)(void __iomem *);
 	struct outer_cache_fns outer_cache;
 };
 
 #define CACHE_LINE_SIZE		32
 
 static void __iomem *l2x0_base;
+static const struct l2c_init_data *l2x0_data;
 static DEFINE_RAW_SPINLOCK(l2x0_lock);
 static u32 l2x0_way_mask;	/* Bitmask of active ways */
 static u32 l2x0_size;
@@ -106,6 +108,14 @@ static inline void l2c_unlock(void __iomem *base, unsigned num)
 	}
 }
 
+static void l2c_configure(void __iomem *base)
+{
+	if (l2x0_data->configure)
+		l2x0_data->configure(base);
+
+	l2c_write_sec(l2x0_saved_regs.aux_ctrl, base, L2X0_AUX_CTRL);
+}
+
 /*
  * Enable the L2 cache controller.  This function must only be
  * called when the cache controller is known to be disabled.
@@ -114,7 +124,12 @@ static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
 {
 	unsigned long flags;
 
-	l2c_write_sec(aux, base, L2X0_AUX_CTRL);
+	/* Do not touch the controller if already enabled. */
+	if (readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)
+		return;
+
+	l2x0_saved_regs.aux_ctrl = aux;
+	l2c_configure(base);
 
 	l2c_unlock(base, num_lock);
 
@@ -208,6 +223,11 @@ static void l2c_save(void __iomem *base)
 	l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
 }
 
+static void l2c_resume(void)
+{
+	l2c_enable(l2x0_base, l2x0_saved_regs.aux_ctrl, l2x0_data->num_lock);
+}
+
 /*
  * L2C-210 specific code.
  *
@@ -288,14 +308,6 @@ static void l2c210_sync(void)
 	__l2c210_cache_sync(l2x0_base);
 }
 
-static void l2c210_resume(void)
-{
-	void __iomem *base = l2x0_base;
-
-	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
-		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 1);
-}
-
 static const struct l2c_init_data l2c210_data __initconst = {
 	.type = "L2C-210",
 	.way_size_0 = SZ_8K,
@@ -309,7 +321,7 @@ static const struct l2c_init_data l2c210_data __initconst = {
 		.flush_all = l2c210_flush_all,
 		.disable = l2c_disable,
 		.sync = l2c210_sync,
-		.resume = l2c210_resume,
+		.resume = l2c_resume,
 	},
 };
 
@@ -466,7 +478,7 @@ static const struct l2c_init_data l2c220_data = {
 		.flush_all = l2c220_flush_all,
 		.disable = l2c_disable,
 		.sync = l2c220_sync,
-		.resume = l2c210_resume,
+		.resume = l2c_resume,
 	},
 };
 
@@ -615,39 +627,29 @@ static void __init l2c310_save(void __iomem *base)
 							L310_POWER_CTRL);
 }
 
-static void l2c310_resume(void)
+static void l2c310_configure(void __iomem *base)
 {
-	void __iomem *base = l2x0_base;
+	unsigned revision;
 
-	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
-		unsigned revision;
-
-		/* restore pl310 setup */
-		l2c_write_sec(l2x0_saved_regs.tag_latency, base,
-			      L310_TAG_LATENCY_CTRL);
-		l2c_write_sec(l2x0_saved_regs.data_latency, base,
-			      L310_DATA_LATENCY_CTRL);
-		l2c_write_sec(l2x0_saved_regs.filter_end, base,
-			      L310_ADDR_FILTER_END);
-		l2c_write_sec(l2x0_saved_regs.filter_start, base,
-			      L310_ADDR_FILTER_START);
-
-		revision = readl_relaxed(base + L2X0_CACHE_ID) &
-				L2X0_CACHE_ID_RTL_MASK;
-
-		if (revision >= L310_CACHE_ID_RTL_R2P0)
-			l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base,
-				      L310_PREFETCH_CTRL);
-		if (revision >= L310_CACHE_ID_RTL_R3P0)
-			l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base,
-				      L310_POWER_CTRL);
-
-		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
-
-		/* Re-enable full-line-of-zeros for Cortex-A9 */
-		if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
-			set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
-	}
+	/* restore pl310 setup */
+	l2c_write_sec(l2x0_saved_regs.tag_latency, base,
+		      L310_TAG_LATENCY_CTRL);
+	l2c_write_sec(l2x0_saved_regs.data_latency, base,
+		      L310_DATA_LATENCY_CTRL);
+	l2c_write_sec(l2x0_saved_regs.filter_end, base,
+		      L310_ADDR_FILTER_END);
+	l2c_write_sec(l2x0_saved_regs.filter_start, base,
+		      L310_ADDR_FILTER_START);
+
+	revision = readl_relaxed(base + L2X0_CACHE_ID) &
+				 L2X0_CACHE_ID_RTL_MASK;
+
+	if (revision >= L310_CACHE_ID_RTL_R2P0)
+		l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base,
+			      L310_PREFETCH_CTRL);
+	if (revision >= L310_CACHE_ID_RTL_R3P0)
+		l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base,
+			      L310_POWER_CTRL);
 }
 
 static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, void *data)
@@ -699,6 +701,23 @@ static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
 		aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP);
 	}
 
+	/* r3p0 or later has power control register */
+	if (rev >= L310_CACHE_ID_RTL_R3P0)
+		l2x0_saved_regs.pwr_ctrl = L310_DYNAMIC_CLK_GATING_EN |
+						L310_STNDBY_MODE_EN;
+
+	/*
+	 * Always enable non-secure access to the lockdown registers -
+	 * we write to them as part of the L2C enable sequence so they
+	 * need to be accessible.
+	 */
+	aux |= L310_AUX_CTRL_NS_LOCKDOWN;
+
+	l2c_enable(base, aux, num_lock);
+
+	/* Read back resulting AUX_CTRL value as it could have been altered. */
+	aux = readl_relaxed(base + L2X0_AUX_CTRL);
+
 	if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) {
 		u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL);
 
@@ -712,23 +731,12 @@ static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
 	if (rev >= L310_CACHE_ID_RTL_R3P0) {
 		u32 power_ctrl;
 
-		l2c_write_sec(L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN,
-			      base, L310_POWER_CTRL);
 		power_ctrl = readl_relaxed(base + L310_POWER_CTRL);
 		pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n",
 			power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis",
 			power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis");
 	}
 
-	/*
-	 * Always enable non-secure access to the lockdown registers -
-	 * we write to them as part of the L2C enable sequence so they
-	 * need to be accessible.
-	 */
-	aux |= L310_AUX_CTRL_NS_LOCKDOWN;
-
-	l2c_enable(base, aux, num_lock);
-
 	if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) {
 		set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
 		cpu_notifier(l2c310_cpu_enable_flz, 0);
@@ -760,11 +768,11 @@ static void __init l2c310_fixup(void __iomem *base, u32 cache_id,
 
 	if (revision >= L310_CACHE_ID_RTL_R3P0 &&
 	    revision < L310_CACHE_ID_RTL_R3P2) {
-		u32 val = readl_relaxed(base + L310_PREFETCH_CTRL);
+		u32 val = l2x0_saved_regs.prefetch_ctrl;
 		/* I don't think bit23 is required here... but iMX6 does so */
 		if (val & (BIT(30) | BIT(23))) {
 			val &= ~(BIT(30) | BIT(23));
-			l2c_write_sec(val, base, L310_PREFETCH_CTRL);
+			l2x0_saved_regs.prefetch_ctrl = val;
 			errata[n++] = "752271";
 		}
 	}
@@ -800,6 +808,15 @@ static void l2c310_disable(void)
 	l2c_disable();
 }
 
+static void l2c310_resume(void)
+{
+	l2c_resume();
+
+	/* Re-enable full-line-of-zeros for Cortex-A9 */
+	if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
+		set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
+}
+
 static const struct l2c_init_data l2c310_init_fns __initconst = {
 	.type = "L2C-310",
 	.way_size_0 = SZ_8K,
@@ -807,6 +824,7 @@ static const struct l2c_init_data l2c310_init_fns __initconst = {
 	.enable = l2c310_enable,
 	.fixup = l2c310_fixup,
 	.save = l2c310_save,
+	.configure = l2c310_configure,
 	.outer_cache = {
 		.inv_range = l2c210_inv_range,
 		.clean_range = l2c210_clean_range,
@@ -818,13 +836,21 @@ static const struct l2c_init_data l2c310_init_fns __initconst = {
 	},
 };
 
-static void __init __l2c_init(const struct l2c_init_data *data,
-	u32 aux_val, u32 aux_mask, u32 cache_id)
+static int __init __l2c_init(const struct l2c_init_data *data,
+			     u32 aux_val, u32 aux_mask, u32 cache_id)
 {
 	struct outer_cache_fns fns;
 	unsigned way_size_bits, ways;
 	u32 aux, old_aux;
 
+	/*
+	 * Save the pointer globally so that callbacks which do not receive
+	 * context from callers can access the structure.
+	 */
+	l2x0_data = kmemdup(data, sizeof(*data), GFP_KERNEL);
+	if (!l2x0_data)
+		return -ENOMEM;
+
 	/*
 	 * Sanity check the aux values.  aux_mask is the bits we preserve
 	 * from reading the hardware register, and aux_val is the bits we
@@ -910,6 +936,8 @@ static void __init __l2c_init(const struct l2c_init_data *data,
 		data->type, ways, l2x0_size >> 10);
 	pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n",
 		data->type, cache_id, aux);
+
+	return 0;
 }
 
 void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
@@ -936,6 +964,10 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 		break;
 	}
 
+	/* Read back current (default) hardware configuration */
+	if (data->save)
+		data->save(l2x0_base);
+
 	__l2c_init(data, aux_val, aux_mask, cache_id);
 }
 
@@ -1102,7 +1134,7 @@ static const struct l2c_init_data of_l2c210_data __initconst = {
 		.flush_all   = l2c210_flush_all,
 		.disable     = l2c_disable,
 		.sync        = l2c210_sync,
-		.resume      = l2c210_resume,
+		.resume      = l2c_resume,
 	},
 };
 
@@ -1120,7 +1152,7 @@ static const struct l2c_init_data of_l2c220_data __initconst = {
 		.flush_all   = l2c220_flush_all,
 		.disable     = l2c_disable,
 		.sync        = l2c220_sync,
-		.resume      = l2c210_resume,
+		.resume      = l2c_resume,
 	},
 };
 
@@ -1135,28 +1167,26 @@ static void __init l2c310_of_parse(const struct device_node *np,
 
 	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
 	if (tag[0] && tag[1] && tag[2])
-		l2c_write_sec(
+		l2x0_saved_regs.tag_latency =
 			L310_LATENCY_CTRL_RD(tag[0] - 1) |
 			L310_LATENCY_CTRL_WR(tag[1] - 1) |
-			L310_LATENCY_CTRL_SETUP(tag[2] - 1),
-			l2x0_base, L310_TAG_LATENCY_CTRL);
+			L310_LATENCY_CTRL_SETUP(tag[2] - 1);
 
 	of_property_read_u32_array(np, "arm,data-latency",
 				   data, ARRAY_SIZE(data));
 	if (data[0] && data[1] && data[2])
-		l2c_write_sec(
+		l2x0_saved_regs.data_latency =
 			L310_LATENCY_CTRL_RD(data[0] - 1) |
 			L310_LATENCY_CTRL_WR(data[1] - 1) |
-			L310_LATENCY_CTRL_SETUP(data[2] - 1),
-			l2x0_base,  L310_DATA_LATENCY_CTRL);
+			L310_LATENCY_CTRL_SETUP(data[2] - 1);
 
 	of_property_read_u32_array(np, "arm,filter-ranges",
 				   filter, ARRAY_SIZE(filter));
 	if (filter[1]) {
-		l2c_write_sec(ALIGN(filter[0] + filter[1], SZ_1M),
-			      l2x0_base, L310_ADDR_FILTER_END);
-		l2c_write_sec((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN,
-			      l2x0_base, L310_ADDR_FILTER_START);
+		l2x0_saved_regs.filter_end =
+					ALIGN(filter[0] + filter[1], SZ_1M);
+		l2x0_saved_regs.filter_start = (filter[0] & ~(SZ_1M - 1))
+					| L310_ADDR_FILTER_EN;
 	}
 
 	ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K);
@@ -1188,6 +1218,7 @@ static const struct l2c_init_data of_l2c310_data __initconst = {
 	.enable = l2c310_enable,
 	.fixup = l2c310_fixup,
 	.save  = l2c310_save,
+	.configure = l2c310_configure,
 	.outer_cache = {
 		.inv_range   = l2c210_inv_range,
 		.clean_range = l2c210_clean_range,
@@ -1216,6 +1247,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = {
 	.enable = l2c310_enable,
 	.fixup = l2c310_fixup,
 	.save  = l2c310_save,
+	.configure = l2c310_configure,
 	.outer_cache = {
 		.inv_range   = l2c210_inv_range,
 		.clean_range = l2c210_clean_range,
@@ -1330,16 +1362,6 @@ static void aurora_save(void __iomem *base)
 	l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL);
 }
 
-static void aurora_resume(void)
-{
-	void __iomem *base = l2x0_base;
-
-	if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
-		writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL);
-		writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL);
-	}
-}
-
 /*
  * For Aurora cache in no outer mode, enable via the CP15 coprocessor
  * broadcasting of cache commands to L2.
@@ -1401,7 +1423,7 @@ static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
 		.flush_all   = l2x0_flush_all,
 		.disable     = l2x0_disable,
 		.sync        = l2x0_cache_sync,
-		.resume      = aurora_resume,
+		.resume      = l2c_resume,
 	},
 };
 
@@ -1414,7 +1436,7 @@ static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
 	.fixup = aurora_fixup,
 	.save  = aurora_save,
 	.outer_cache = {
-		.resume      = aurora_resume,
+		.resume      = l2c_resume,
 	},
 };
 
@@ -1562,6 +1584,7 @@ static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
 	.of_parse = l2c310_of_parse,
 	.enable = l2c310_enable,
 	.save  = l2c310_save,
+	.configure = l2c310_configure,
 	.outer_cache = {
 		.inv_range   = bcm_inv_range,
 		.clean_range = bcm_clean_range,
@@ -1583,18 +1606,12 @@ static void __init tauros3_save(void __iomem *base)
 		readl_relaxed(base + L310_PREFETCH_CTRL);
 }
 
-static void tauros3_resume(void)
+static void tauros3_configure(void __iomem *base)
 {
-	void __iomem *base = l2x0_base;
-
-	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
-		writel_relaxed(l2x0_saved_regs.aux2_ctrl,
-			       base + TAUROS3_AUX2_CTRL);
-		writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
-			       base + L310_PREFETCH_CTRL);
-
-		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
-	}
+	writel_relaxed(l2x0_saved_regs.aux2_ctrl,
+		       base + TAUROS3_AUX2_CTRL);
+	writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
+		       base + L310_PREFETCH_CTRL);
 }
 
 static const struct l2c_init_data of_tauros3_data __initconst = {
@@ -1603,9 +1620,10 @@ static const struct l2c_init_data of_tauros3_data __initconst = {
 	.num_lock = 8,
 	.enable = l2c_enable,
 	.save  = tauros3_save,
+	.configure = tauros3_configure,
 	/* Tauros3 broadcasts L1 cache operations to L2 */
 	.outer_cache = {
-		.resume      = tauros3_resume,
+		.resume      = l2c_resume,
 	},
 };
 
@@ -1661,6 +1679,10 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 	if (!of_property_read_bool(np, "cache-unified"))
 		pr_err("L2C: device tree omits to specify unified cache\n");
 
+	/* Read back current (default) hardware configuration */
+	if (data->save)
+		data->save(l2x0_base);
+
 	/* L2 configuration can only be changed if the cache is disabled */
 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
 		if (data->of_parse)
@@ -1671,8 +1693,6 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 	else
 		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
 
-	__l2c_init(data, aux_val, aux_mask, cache_id);
-
-	return 0;
+	return __l2c_init(data, aux_val, aux_mask, cache_id);
 }
 #endif
-- 
cgit v1.2.3


From c6d1a2d0078a30eb6290428a858c4e790a0e8691 Mon Sep 17 00:00:00 2001
From: Tomasz Figa <t.figa@samsung.com>
Date: Thu, 8 Jan 2015 07:51:07 +0100
Subject: ARM: 8260/1: l2c: Add interface to ask hypervisor to configure L2C

Because certain secure hypervisor do not allow writes to individual L2C
registers, but rather expect set of parameters to be passed as argument
to secure monitor calls, there is a need to provide an interface for the
L2C driver to ask the firmware to configure the hardware according to
specified parameters. This patch adds such.

Signed-off-by: Tomasz Figa <t.figa@samsung.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Nishanth Menon <nm@ti.com>
Acked-by: Nishanth Menon <nm@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index dde0d54ac41e..5288153f28b8 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -110,6 +110,11 @@ static inline void l2c_unlock(void __iomem *base, unsigned num)
 
 static void l2c_configure(void __iomem *base)
 {
+	if (outer_cache.configure) {
+		outer_cache.configure(&l2x0_saved_regs);
+		return;
+	}
+
 	if (l2x0_data->configure)
 		l2x0_data->configure(base);
 
@@ -910,6 +915,7 @@ static int __init __l2c_init(const struct l2c_init_data *data,
 
 	fns = data->outer_cache;
 	fns.write_sec = outer_cache.write_sec;
+	fns.configure = outer_cache.configure;
 	if (data->fixup)
 		data->fixup(l2x0_base, cache_id, &fns);
 
-- 
cgit v1.2.3


From cf0681ca4cc5c9faa85a2df4c063b926fc09c977 Mon Sep 17 00:00:00 2001
From: Tomasz Figa <t.figa@samsung.com>
Date: Thu, 8 Jan 2015 07:52:38 +0100
Subject: ARM: 8262/1: l2c: Add support for overriding prefetch settings

Firmware on certain boards (e.g. ODROID-U3) can leave incorrect L2C prefetch
settings configured in registers leading to crashes if L2C is enabled
without overriding them. This patch introduces bindings to enable
prefetch settings to be specified from DT and necessary support in the
driver.

[mszyprow: rebased onto v3.18-rc1, added error message when prefetch related
 dt property has been provided without any value]

Signed-off-by: Tomasz Figa <t.figa@samsung.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Nishanth Menon <nm@ti.com>
Acked-by: Nishanth Menon <nm@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 5288153f28b8..01de13809454 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -1169,6 +1169,8 @@ static void __init l2c310_of_parse(const struct device_node *np,
 	u32 tag[3] = { 0, 0, 0 };
 	u32 filter[2] = { 0, 0 };
 	u32 assoc;
+	u32 prefetch;
+	u32 val;
 	int ret;
 
 	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
@@ -1214,6 +1216,58 @@ static void __init l2c310_of_parse(const struct device_node *np,
 		       assoc);
 		break;
 	}
+
+	prefetch = l2x0_saved_regs.prefetch_ctrl;
+
+	ret = of_property_read_u32(np, "arm,double-linefill", &val);
+	if (ret == 0) {
+		if (val)
+			prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,double-linefill property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "arm,double-linefill-incr", &val);
+	if (ret == 0) {
+		if (val)
+			prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_INCR;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_INCR;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,double-linefill-incr property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "arm,double-linefill-wrap", &val);
+	if (ret == 0) {
+		if (!val)
+			prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,double-linefill-wrap property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "arm,prefetch-drop", &val);
+	if (ret == 0) {
+		if (val)
+			prefetch |= L310_PREFETCH_CTRL_PREFETCH_DROP;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_PREFETCH_DROP;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,prefetch-drop property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "arm,prefetch-offset", &val);
+	if (ret == 0) {
+		prefetch &= ~L310_PREFETCH_CTRL_OFFSET_MASK;
+		prefetch |= val & L310_PREFETCH_CTRL_OFFSET_MASK;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,prefetch-offset property value is missing\n");
+	}
+
+	l2x0_saved_regs.prefetch_ctrl = prefetch;
 }
 
 static const struct l2c_init_data of_l2c310_data __initconst = {
-- 
cgit v1.2.3


From 99a468d779f6851a31053e6a33bf91391034010b Mon Sep 17 00:00:00 2001
From: "George G. Davis" <ggdavisiv@gmail.com>
Date: Fri, 16 Jan 2015 11:21:05 +0100
Subject: ARM: 8286/1: mm: Fix dma_contiguous_reserve comment

DMA contiguous allocations have been able to use highmem since commit
"95b0e65 ARM: mm: don't limit default CMA region only to low memory"
but a comment still notes the earlier "low memory" limitation.  Update
the comment to remove the low memory limitation and fix the
s/contigouos/contiguous/ typo while we're at it.

Signed-off-by: George G. Davis <george_davis@mentor.com>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/init.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 98ad9c79ea0e..d538dc73fcb0 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -319,10 +319,7 @@ void __init arm_memblock_init(const struct machine_desc *mdesc)
 
 	early_init_fdt_scan_reserved_mem();
 
-	/*
-	 * reserve memory for DMA contigouos allocations,
-	 * must come from DMA area inside low memory
-	 */
+	/* reserve memory for DMA contiguous allocations */
 	dma_contiguous_reserve(arm_dma_limit);
 
 	arm_memblock_steal_permitted = false;
-- 
cgit v1.2.3


From c2273a185354fe9420fb342b1ca09a6fed857fb3 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 16 Jan 2015 18:01:43 +0100
Subject: ARM: 8288/1: dma-mapping: don't detach devices without an IOMMU
 during teardown

When tearing down the DMA ops for a device via of_dma_deconfigure, we
unconditionally detach the device from its IOMMU domain. For devices
that aren't actually behind an IOMMU, this produces a "Not attached"
warning message on the console.

This patch changes the teardown code so that we don't detach from the
IOMMU domain when there isn't an IOMMU dma mapping to start with.

Reported-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/dma-mapping.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 7864797609b3..f142ddd6c40a 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -2025,6 +2025,9 @@ static void arm_teardown_iommu_dma_ops(struct device *dev)
 {
 	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
 
+	if (!mapping)
+		return;
+
 	arm_iommu_detach_device(dev);
 	arm_iommu_release_mapping(mapping);
 }
-- 
cgit v1.2.3


From fba289054f24d2550f47a1413e1ccc24f4165560 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 28 Jan 2015 17:58:33 +0100
Subject: ARM: 8298/1: ARM_KERNMEM_PERMS only works with MMU enabled

The recently added ARM_KERNMEM_PERMS feature works by manipulating
the kernel page tables, which obviously requires an MMU. Trying
to enable this feature when the MMU is disabled results in a lot
of compile errors in mm/init.c, so let's add a Kconfig dependency
to avoid that case.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 03823e784f63..c43c71455566 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -1012,6 +1012,7 @@ config ARCH_SUPPORTS_BIG_ENDIAN
 
 config ARM_KERNMEM_PERMS
 	bool "Restrict kernel memory permissions"
+	depends on MMU
 	help
 	  If this is set, kernel memory other than kernel text (and rodata)
 	  will be made non-executable. The tradeoff is that each region is
-- 
cgit v1.2.3


From 8e64806672466392acf19e14427d1c29df3e58b9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 29 Jan 2015 16:41:46 +0100
Subject: ARM: 8299/1: mm: ensure local active ASID is marked as allocated on
 rollover

Commit e1a5848e3398 ("ARM: 7924/1: mm: don't bother with reserved ttbr0
when running with LPAE") removed the use of the reserved TTBR0 value
for LPAE systems, since the ASID is held in the TTBR and can be updated
atomicly with the pgd of the next mm.

Unfortunately, this patch forgot to update flush_context, which
deliberately avoids marking the local active ASID as allocated, since we
used to switch via ASID zero and didn't need to allocate the ASID of
the previous mm. The side-effect of this is that we can allocate the
same ASID to the next mm and, between flushing the local TLB and updating
TTBR0, we can perform speculative TLB fills for userspace nG mappings
using the page table of the previous mm.

The consequence of this is that the next mm can erroneously hit some
mappings of the previous mm. Note that this was made significantly
harder to hit by a391263cd84e ("ARM: 8203/1: mm: try to re-use old ASID
assignments following a rollover") but is still theoretically possible.

This patch fixes the problem by removing the code from flush_context
that forces the allocated ASID to zero for the local CPU. Many thanks
to the Broadcom guys for tracking this one down.

Fixes: e1a5848e3398 ("ARM: 7924/1: mm: don't bother with reserved ttbr0 when running with LPAE")

Cc: <stable@vger.kernel.org> # v3.14+
Reported-by: Raymond Ngun <rngun@broadcom.com>
Tested-by: Raymond Ngun <rngun@broadcom.com>
Reviewed-by: Gregory Fong <gregory.0xf0@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/context.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 91892569710f..845769e41332 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -144,21 +144,17 @@ static void flush_context(unsigned int cpu)
 	/* Update the list of reserved ASIDs and the ASID bitmap. */
 	bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
 	for_each_possible_cpu(i) {
-		if (i == cpu) {
-			asid = 0;
-		} else {
-			asid = atomic64_xchg(&per_cpu(active_asids, i), 0);
-			/*
-			 * If this CPU has already been through a
-			 * rollover, but hasn't run another task in
-			 * the meantime, we must preserve its reserved
-			 * ASID, as this is the only trace we have of
-			 * the process it is still running.
-			 */
-			if (asid == 0)
-				asid = per_cpu(reserved_asids, i);
-			__set_bit(asid & ~ASID_MASK, asid_map);
-		}
+		asid = atomic64_xchg(&per_cpu(active_asids, i), 0);
+		/*
+		 * If this CPU has already been through a
+		 * rollover, but hasn't run another task in
+		 * the meantime, we must preserve its reserved
+		 * ASID, as this is the only trace we have of
+		 * the process it is still running.
+		 */
+		if (asid == 0)
+			asid = per_cpu(reserved_asids, i);
+		__set_bit(asid & ~ASID_MASK, asid_map);
 		per_cpu(reserved_asids, i) = asid;
 	}
 
-- 
cgit v1.2.3


From 20e783e39e55c2615fb61d1b3d139ee9edcf6772 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 28 Jan 2015 17:54:38 +0100
Subject: ARM: 8296/1: cache-l2x0: clean up aurora cache handling

The aurora cache controller is the only remaining user of a couple
of functions in this file and are completely unused when that is
disabled, leading to build warnings:

arch/arm/mm/cache-l2x0.c:167:13: warning: 'l2x0_cache_sync' defined but not used [-Wunused-function]
arch/arm/mm/cache-l2x0.c:184:13: warning: 'l2x0_flush_all' defined but not used [-Wunused-function]
arch/arm/mm/cache-l2x0.c:194:13: warning: 'l2x0_disable' defined but not used [-Wunused-function]

With the knowledge that the code is now aurora-specific, we can
simplify it noticeably:

- The pl310 errata workarounds are not needed on aurora and can be removed
- As confirmed by Thomas Petazzoni from the data sheet, the cache_wait()
  macro is never needed.
- No need to hold the lock across atomic cache sync
- We can load the l2x0_base into a local variable across operations

There should be no functional change in this patch, but readability
and the generated object code improves, along with avoiding the
warnings.

 (on Armada 370 RD and Armada XP GP, boot tested, plus a little bit of
 DMA traffic by reading data from a SD card)

Acked-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Tested-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 111 ++++++++++++++++-------------------------------
 1 file changed, 38 insertions(+), 73 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 01de13809454..404c598da27d 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -156,73 +156,6 @@ static void l2c_disable(void)
 	dsb(st);
 }
 
-#ifdef CONFIG_CACHE_PL310
-static inline void cache_wait(void __iomem *reg, unsigned long mask)
-{
-	/* cache operations by line are atomic on PL310 */
-}
-#else
-#define cache_wait	l2c_wait_mask
-#endif
-
-static inline void cache_sync(void)
-{
-	void __iomem *base = l2x0_base;
-
-	writel_relaxed(0, base + sync_reg_offset);
-	cache_wait(base + L2X0_CACHE_SYNC, 1);
-}
-
-#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915)
-static inline void debug_writel(unsigned long val)
-{
-	l2c_set_debug(l2x0_base, val);
-}
-#else
-/* Optimised out for non-errata case */
-static inline void debug_writel(unsigned long val)
-{
-}
-#endif
-
-static void l2x0_cache_sync(void)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	cache_sync();
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-}
-
-static void __l2x0_flush_all(void)
-{
-	debug_writel(0x03);
-	__l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY);
-	cache_sync();
-	debug_writel(0x00);
-}
-
-static void l2x0_flush_all(void)
-{
-	unsigned long flags;
-
-	/* clean all ways */
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	__l2x0_flush_all();
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-}
-
-static void l2x0_disable(void)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	__l2x0_flush_all();
-	l2c_write_sec(0, l2x0_base, L2X0_CTRL);
-	dsb(st);
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-}
-
 static void l2c_save(void __iomem *base)
 {
 	l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
@@ -1349,14 +1282,15 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end)
 static void aurora_pa_range(unsigned long start, unsigned long end,
 			unsigned long offset)
 {
+	void __iomem *base = l2x0_base;
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	writel_relaxed(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG);
-	writel_relaxed(end, l2x0_base + offset);
+	writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG);
+	writel_relaxed(end, base + offset);
 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 
-	cache_sync();
+	writel_relaxed(0, base + AURORA_SYNC_REG);
 }
 
 static void aurora_inv_range(unsigned long start, unsigned long end)
@@ -1416,6 +1350,37 @@ static void aurora_flush_range(unsigned long start, unsigned long end)
 	}
 }
 
+static void aurora_flush_all(void)
+{
+	void __iomem *base = l2x0_base;
+	unsigned long flags;
+
+	/* clean all ways */
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	__l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+
+	writel_relaxed(0, base + AURORA_SYNC_REG);
+}
+
+static void aurora_cache_sync(void)
+{
+	writel_relaxed(0, l2x0_base + AURORA_SYNC_REG);
+}
+
+static void aurora_disable(void)
+{
+	void __iomem *base = l2x0_base;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	__l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+	writel_relaxed(0, base + AURORA_SYNC_REG);
+	l2c_write_sec(0, base, L2X0_CTRL);
+	dsb(st);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
 static void aurora_save(void __iomem *base)
 {
 	l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
@@ -1480,9 +1445,9 @@ static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
 		.inv_range   = aurora_inv_range,
 		.clean_range = aurora_clean_range,
 		.flush_range = aurora_flush_range,
-		.flush_all   = l2x0_flush_all,
-		.disable     = l2x0_disable,
-		.sync        = l2x0_cache_sync,
+		.flush_all   = aurora_flush_all,
+		.disable     = aurora_disable,
+		.sync	     = aurora_cache_sync,
 		.resume      = l2c_resume,
 	},
 };
-- 
cgit v1.2.3


From 1d88967900b87f94435581dad4ae319686c6ce10 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 28 Jan 2015 17:55:31 +0100
Subject: ARM: 8297/1: cache-l2x0: optimize aurora range operations

The aurora_inv_range(), aurora_clean_range() and aurora_flush_range()
functions are highly redundant, both in source and in object code, and
they are harder to understand than necessary.

By moving the range loop into the aurora_pa_range() function, they
become trivial wrappers, and the object code start looking like what
one would expect for an optimal implementation.

Further optimization may be possible by using the per-CPU "virtual"
registers to avoid the spinlocks in most cases.

 (on Armada 370 RD and Armada XP GP, boot tested, plus a little bit of
 DMA traffic by reading data from a SD card)

Reviewed-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Tested-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 68 ++++++++++++++++--------------------------------
 1 file changed, 22 insertions(+), 46 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 404c598da27d..5ea2d6d417f7 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -1256,7 +1256,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = {
  * noninclusive, while the hardware cache range operations use
  * inclusive start and end addresses.
  */
-static unsigned long calc_range_end(unsigned long start, unsigned long end)
+static unsigned long aurora_range_end(unsigned long start, unsigned long end)
 {
 	/*
 	 * Limit the number of cache lines processed at once,
@@ -1275,26 +1275,13 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end)
 	return end;
 }
 
-/*
- * Make sure 'start' and 'end' reference the same page, as L2 is PIPT
- * and range operations only do a TLB lookup on the start address.
- */
 static void aurora_pa_range(unsigned long start, unsigned long end,
-			unsigned long offset)
+			    unsigned long offset)
 {
 	void __iomem *base = l2x0_base;
+	unsigned long range_end;
 	unsigned long flags;
 
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG);
-	writel_relaxed(end, base + offset);
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-
-	writel_relaxed(0, base + AURORA_SYNC_REG);
-}
-
-static void aurora_inv_range(unsigned long start, unsigned long end)
-{
 	/*
 	 * round start and end adresses up to cache line size
 	 */
@@ -1302,15 +1289,24 @@ static void aurora_inv_range(unsigned long start, unsigned long end)
 	end = ALIGN(end, CACHE_LINE_SIZE);
 
 	/*
-	 * Invalidate all full cache lines between 'start' and 'end'.
+	 * perform operation on all full cache lines between 'start' and 'end'
 	 */
 	while (start < end) {
-		unsigned long range_end = calc_range_end(start, end);
-		aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
-				AURORA_INVAL_RANGE_REG);
+		range_end = aurora_range_end(start, end);
+
+		raw_spin_lock_irqsave(&l2x0_lock, flags);
+		writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG);
+		writel_relaxed(range_end - CACHE_LINE_SIZE, base + offset);
+		raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+
+		writel_relaxed(0, base + AURORA_SYNC_REG);
 		start = range_end;
 	}
 }
+static void aurora_inv_range(unsigned long start, unsigned long end)
+{
+	aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
+}
 
 static void aurora_clean_range(unsigned long start, unsigned long end)
 {
@@ -1318,36 +1314,16 @@ static void aurora_clean_range(unsigned long start, unsigned long end)
 	 * If L2 is forced to WT, the L2 will always be clean and we
 	 * don't need to do anything here.
 	 */
-	if (!l2_wt_override) {
-		start &= ~(CACHE_LINE_SIZE - 1);
-		end = ALIGN(end, CACHE_LINE_SIZE);
-		while (start != end) {
-			unsigned long range_end = calc_range_end(start, end);
-			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
-					AURORA_CLEAN_RANGE_REG);
-			start = range_end;
-		}
-	}
+	if (!l2_wt_override)
+		aurora_pa_range(start, end, AURORA_CLEAN_RANGE_REG);
 }
 
 static void aurora_flush_range(unsigned long start, unsigned long end)
 {
-	start &= ~(CACHE_LINE_SIZE - 1);
-	end = ALIGN(end, CACHE_LINE_SIZE);
-	while (start != end) {
-		unsigned long range_end = calc_range_end(start, end);
-		/*
-		 * If L2 is forced to WT, the L2 will always be clean and we
-		 * just need to invalidate.
-		 */
-		if (l2_wt_override)
-			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
-							AURORA_INVAL_RANGE_REG);
-		else
-			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
-							AURORA_FLUSH_RANGE_REG);
-		start = range_end;
-	}
+	if (l2_wt_override)
+		aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
+	else
+		aurora_pa_range(start, end, AURORA_FLUSH_RANGE_REG);
 }
 
 static void aurora_flush_all(void)
-- 
cgit v1.2.3