From cca547e9aa3a6d561fe65e75a4bb2c18d80c541a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 17 Dec 2014 17:57:38 +0100 Subject: ARM: 8249/1: mm: dump: don't skip regions Currently the arm page table dumping code starts dumping page tables from USER_PGTABLES_CEILING. This is unnecessary for skipping any entries related to userspace as the swapper_pg_dir does not contain such entries, and results in a couple of unfortuante side effects. Firstly, any kernel mappings which might exist below USER_PGTABLES_CEILING will not be accounted in the dump output. This masks any entries erroneously created below this address. Secondly, if the final page table entry walked is part of a valid mapping the page table dumping code will not log the region this entry is part of, as the final note_page call in walk_pgd will trigger an early return when 0 < USER_PGTABLES_CEILING. Luckily this isn't seen on contemporary systems as they typically don't have enough RAM to extend the linear mapping right to the end of the address space. Due to the way addr is constructed in the walk_* functions, it can never be less than USER_PGTABLES_CEILING when walking the page tables, so it is not necessary to avoid dereferencing invalid table addresses. The existing checks for st->current_prot and st->marker[1].start_address are sufficient to ensure we will not print and/or dereference garbage when trying to log information. This patch removes both problematic uses of USER_PGTABLES_CEILING from the arm page table dumping code, preventing both of these issues. We will now report any low mappings, and the final note_page call will not return early, ensuring all regions are logged. Signed-off-by: Mark Rutland Cc: Steve Capper Cc: Kees Cook Cc: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/dump.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index 59424937e52b..9fe8e241335c 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -220,9 +220,6 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u static const char units[] = "KMGTPE"; u64 prot = val & pg_level[level].mask; - if (addr < USER_PGTABLES_CEILING) - return; - if (!st->level) { st->level = level; st->current_prot = prot; @@ -308,15 +305,13 @@ static void walk_pgd(struct seq_file *m) pgd_t *pgd = swapper_pg_dir; struct pg_state st; unsigned long addr; - unsigned i, pgdoff = USER_PGTABLES_CEILING / PGDIR_SIZE; + unsigned i; memset(&st, 0, sizeof(st)); st.seq = m; st.marker = address_markers; - pgd += pgdoff; - - for (i = pgdoff; i < PTRS_PER_PGD; i++, pgd++) { + for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { addr = i * PGDIR_SIZE; if (!pgd_none(*pgd)) { walk_pud(&st, pgd, addr); -- cgit v1.2.3 From ac08468867e99bc02b22baf4e58bc3537e9d852c Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Tue, 23 Dec 2014 19:36:55 +0100 Subject: ARM: 8253/1: mm: use phys_addr_t type in map_lowmem() for kernel mem region Now local variables kernel_x_start and kernel_x_end defined using 'unsigned long' type which is wrong because they represent physical memory range and will be calculated wrongly if LPAE is enabled. As result, all following code in map_lowmem() will not work correctly. For example, Keystone 2 boot is broken because kernel_x_start == 0x0000 0000 kernel_x_end == 0x0080 0000 instead of kernel_x_start == 0x0000 0008 0000 0000 kernel_x_end == 0x0000 0008 0080 0000 and as result whole low memory will be mapped with MT_MEMORY_RW permissions by code (start > kernel_x_end): } else if (start >= kernel_x_end) { map.pfn = __phys_to_pfn(start); map.virtual = __phys_to_virt(start); map.length = end - start; map.type = MT_MEMORY_RW; create_mapping(&map); } Hence, fix it by using phys_addr_t type for variables kernel_x_start and kernel_x_end. Tested-by: Murali Karicheri Signed-off-by: Grygorii Strashko Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index cda7c40999b6..4e6ef896c619 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1329,8 +1329,8 @@ static void __init kmap_init(void) static void __init map_lowmem(void) { struct memblock_region *reg; - unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); - unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); + phys_addr_t kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); + phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); /* Map all the lowmem memory banks. */ for_each_memblock(memory, reg) { -- cgit v1.2.3 From 1e3479225acbb7ae048ac30fb7c6090fa7f0df02 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Fri, 9 Jan 2015 18:55:45 +0100 Subject: ARM: 8275/1: mm: fix PMD_SECT_RDONLY undeclared compile error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In v3.19-rc3 tree when CONFIG_ARM_LPAE and CONFIG_DEBUG_RODATA are enabled image failed to compile with the following error: arch/arm/mm/init.c:661:14: error: ‘PMD_SECT_RDONLY’ undeclared here (not in a function) It seems that '80d6b0c ARM: mm: allow text and rodata sections to be read-only' and 'ded9477 ARM: 8109/1: mm: Modify pte_write and pmd_write logic for LPAE' commits crossed. 80d6b0c uses PMD_SECT_RDONLY macro but ded9477 renames it and uses software bits L_PMD_SECT_RDONLY instead. Fix is to use L_PMD_SECT_RDONLY instead PMD_SECT_RDONLY as ded9477 does in another places. Signed-off-by: Victor Kamensky Acked-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 98ad9c79ea0e..2495c8cb47ba 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -658,8 +658,8 @@ static struct section_perm ro_perms[] = { .start = (unsigned long)_stext, .end = (unsigned long)__init_begin, #ifdef CONFIG_ARM_LPAE - .mask = ~PMD_SECT_RDONLY, - .prot = PMD_SECT_RDONLY, + .mask = ~L_PMD_SECT_RDONLY, + .prot = L_PMD_SECT_RDONLY, #else .mask = ~(PMD_SECT_APX | PMD_SECT_AP_WRITE), .prot = PMD_SECT_APX | PMD_SECT_AP_WRITE, -- cgit v1.2.3 From 00218241aa0846e75d31b1dbadb5f8a76be1cc97 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 8 Jan 2015 07:49:41 +0100 Subject: ARM: 8258/1: l2c: use l2c_write_sec() for restoring latency and filter regs All four register for latency and filter settings cannot be written in non-secure mode and they should go through l2c_write_sec(). More on this can be found in CoreLink Level 2 Cache Controller L2C-310 Technical Reference Manual, 3.2. Register summary, table 3.1. This have been checked the TRM for r3p3, but it should be uniform for all revisions. Reported-by: Nishanth Menon Suggested-by: Tomasz Figa Signed-off-by: Marek Szyprowski Tested-by: Nishanth Menon Acked-by: Nishanth Menon Acked-by: Tony Lindgren Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 5e65ca8dea62..b83c401ca50c 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -623,14 +623,14 @@ static void l2c310_resume(void) unsigned revision; /* restore pl310 setup */ - writel_relaxed(l2x0_saved_regs.tag_latency, - base + L310_TAG_LATENCY_CTRL); - writel_relaxed(l2x0_saved_regs.data_latency, - base + L310_DATA_LATENCY_CTRL); - writel_relaxed(l2x0_saved_regs.filter_end, - base + L310_ADDR_FILTER_END); - writel_relaxed(l2x0_saved_regs.filter_start, - base + L310_ADDR_FILTER_START); + l2c_write_sec(l2x0_saved_regs.tag_latency, base, + L310_TAG_LATENCY_CTRL); + l2c_write_sec(l2x0_saved_regs.data_latency, base, + L310_DATA_LATENCY_CTRL); + l2c_write_sec(l2x0_saved_regs.filter_end, base, + L310_ADDR_FILTER_END); + l2c_write_sec(l2x0_saved_regs.filter_start, base, + L310_ADDR_FILTER_START); revision = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK; @@ -1135,28 +1135,28 @@ static void __init l2c310_of_parse(const struct device_node *np, of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); if (tag[0] && tag[1] && tag[2]) - writel_relaxed( + l2c_write_sec( L310_LATENCY_CTRL_RD(tag[0] - 1) | L310_LATENCY_CTRL_WR(tag[1] - 1) | L310_LATENCY_CTRL_SETUP(tag[2] - 1), - l2x0_base + L310_TAG_LATENCY_CTRL); + l2x0_base, L310_TAG_LATENCY_CTRL); of_property_read_u32_array(np, "arm,data-latency", data, ARRAY_SIZE(data)); if (data[0] && data[1] && data[2]) - writel_relaxed( + l2c_write_sec( L310_LATENCY_CTRL_RD(data[0] - 1) | L310_LATENCY_CTRL_WR(data[1] - 1) | L310_LATENCY_CTRL_SETUP(data[2] - 1), - l2x0_base + L310_DATA_LATENCY_CTRL); + l2x0_base, L310_DATA_LATENCY_CTRL); of_property_read_u32_array(np, "arm,filter-ranges", filter, ARRAY_SIZE(filter)); if (filter[1]) { - writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M), - l2x0_base + L310_ADDR_FILTER_END); - writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN, - l2x0_base + L310_ADDR_FILTER_START); + l2c_write_sec(ALIGN(filter[0] + filter[1], SZ_1M), + l2x0_base, L310_ADDR_FILTER_END); + l2c_write_sec((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN, + l2x0_base, L310_ADDR_FILTER_START); } ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K); -- cgit v1.2.3 From 6b49241ac2525818508ee2baff9a58541c65421c Mon Sep 17 00:00:00 2001 From: Tomasz Figa Date: Thu, 8 Jan 2015 07:50:29 +0100 Subject: ARM: 8259/1: l2c: Refactor the driver to use commit-like interface Certain implementations of secure hypervisors (namely the one found on Samsung Exynos-based boards) do not provide access to individual L2C registers. This makes the .write_sec()-based interface insufficient and provoking ugly hacks. This patch is first step to make the driver not rely on availability of writes to individual registers. This is achieved by refactoring the driver to use a commit-like operation scheme: all register values are prepared first and stored in an instance of l2x0_regs struct and then a single callback is responsible to flush those values to the hardware. [mszyprow: rebased onto 'ARM: l2c: use l2c_write_sec() for restoring latency and filter regs' patch] Signed-off-by: Tomasz Figa Signed-off-by: Marek Szyprowski Tested-by: Nishanth Menon Acked-by: Tony Lindgren Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 212 ++++++++++++++++++++++++++--------------------- 1 file changed, 116 insertions(+), 96 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index b83c401ca50c..dde0d54ac41e 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -41,12 +41,14 @@ struct l2c_init_data { void (*enable)(void __iomem *, u32, unsigned); void (*fixup)(void __iomem *, u32, struct outer_cache_fns *); void (*save)(void __iomem *); + void (*configure)(void __iomem *); struct outer_cache_fns outer_cache; }; #define CACHE_LINE_SIZE 32 static void __iomem *l2x0_base; +static const struct l2c_init_data *l2x0_data; static DEFINE_RAW_SPINLOCK(l2x0_lock); static u32 l2x0_way_mask; /* Bitmask of active ways */ static u32 l2x0_size; @@ -106,6 +108,14 @@ static inline void l2c_unlock(void __iomem *base, unsigned num) } } +static void l2c_configure(void __iomem *base) +{ + if (l2x0_data->configure) + l2x0_data->configure(base); + + l2c_write_sec(l2x0_saved_regs.aux_ctrl, base, L2X0_AUX_CTRL); +} + /* * Enable the L2 cache controller. This function must only be * called when the cache controller is known to be disabled. @@ -114,7 +124,12 @@ static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock) { unsigned long flags; - l2c_write_sec(aux, base, L2X0_AUX_CTRL); + /* Do not touch the controller if already enabled. */ + if (readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN) + return; + + l2x0_saved_regs.aux_ctrl = aux; + l2c_configure(base); l2c_unlock(base, num_lock); @@ -208,6 +223,11 @@ static void l2c_save(void __iomem *base) l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); } +static void l2c_resume(void) +{ + l2c_enable(l2x0_base, l2x0_saved_regs.aux_ctrl, l2x0_data->num_lock); +} + /* * L2C-210 specific code. * @@ -288,14 +308,6 @@ static void l2c210_sync(void) __l2c210_cache_sync(l2x0_base); } -static void l2c210_resume(void) -{ - void __iomem *base = l2x0_base; - - if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) - l2c_enable(base, l2x0_saved_regs.aux_ctrl, 1); -} - static const struct l2c_init_data l2c210_data __initconst = { .type = "L2C-210", .way_size_0 = SZ_8K, @@ -309,7 +321,7 @@ static const struct l2c_init_data l2c210_data __initconst = { .flush_all = l2c210_flush_all, .disable = l2c_disable, .sync = l2c210_sync, - .resume = l2c210_resume, + .resume = l2c_resume, }, }; @@ -466,7 +478,7 @@ static const struct l2c_init_data l2c220_data = { .flush_all = l2c220_flush_all, .disable = l2c_disable, .sync = l2c220_sync, - .resume = l2c210_resume, + .resume = l2c_resume, }, }; @@ -615,39 +627,29 @@ static void __init l2c310_save(void __iomem *base) L310_POWER_CTRL); } -static void l2c310_resume(void) +static void l2c310_configure(void __iomem *base) { - void __iomem *base = l2x0_base; + unsigned revision; - if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) { - unsigned revision; - - /* restore pl310 setup */ - l2c_write_sec(l2x0_saved_regs.tag_latency, base, - L310_TAG_LATENCY_CTRL); - l2c_write_sec(l2x0_saved_regs.data_latency, base, - L310_DATA_LATENCY_CTRL); - l2c_write_sec(l2x0_saved_regs.filter_end, base, - L310_ADDR_FILTER_END); - l2c_write_sec(l2x0_saved_regs.filter_start, base, - L310_ADDR_FILTER_START); - - revision = readl_relaxed(base + L2X0_CACHE_ID) & - L2X0_CACHE_ID_RTL_MASK; - - if (revision >= L310_CACHE_ID_RTL_R2P0) - l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base, - L310_PREFETCH_CTRL); - if (revision >= L310_CACHE_ID_RTL_R3P0) - l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base, - L310_POWER_CTRL); - - l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8); - - /* Re-enable full-line-of-zeros for Cortex-A9 */ - if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) - set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); - } + /* restore pl310 setup */ + l2c_write_sec(l2x0_saved_regs.tag_latency, base, + L310_TAG_LATENCY_CTRL); + l2c_write_sec(l2x0_saved_regs.data_latency, base, + L310_DATA_LATENCY_CTRL); + l2c_write_sec(l2x0_saved_regs.filter_end, base, + L310_ADDR_FILTER_END); + l2c_write_sec(l2x0_saved_regs.filter_start, base, + L310_ADDR_FILTER_START); + + revision = readl_relaxed(base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + if (revision >= L310_CACHE_ID_RTL_R2P0) + l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base, + L310_PREFETCH_CTRL); + if (revision >= L310_CACHE_ID_RTL_R3P0) + l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base, + L310_POWER_CTRL); } static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, void *data) @@ -699,6 +701,23 @@ static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP); } + /* r3p0 or later has power control register */ + if (rev >= L310_CACHE_ID_RTL_R3P0) + l2x0_saved_regs.pwr_ctrl = L310_DYNAMIC_CLK_GATING_EN | + L310_STNDBY_MODE_EN; + + /* + * Always enable non-secure access to the lockdown registers - + * we write to them as part of the L2C enable sequence so they + * need to be accessible. + */ + aux |= L310_AUX_CTRL_NS_LOCKDOWN; + + l2c_enable(base, aux, num_lock); + + /* Read back resulting AUX_CTRL value as it could have been altered. */ + aux = readl_relaxed(base + L2X0_AUX_CTRL); + if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) { u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL); @@ -712,23 +731,12 @@ static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) if (rev >= L310_CACHE_ID_RTL_R3P0) { u32 power_ctrl; - l2c_write_sec(L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN, - base, L310_POWER_CTRL); power_ctrl = readl_relaxed(base + L310_POWER_CTRL); pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n", power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis", power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis"); } - /* - * Always enable non-secure access to the lockdown registers - - * we write to them as part of the L2C enable sequence so they - * need to be accessible. - */ - aux |= L310_AUX_CTRL_NS_LOCKDOWN; - - l2c_enable(base, aux, num_lock); - if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) { set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); cpu_notifier(l2c310_cpu_enable_flz, 0); @@ -760,11 +768,11 @@ static void __init l2c310_fixup(void __iomem *base, u32 cache_id, if (revision >= L310_CACHE_ID_RTL_R3P0 && revision < L310_CACHE_ID_RTL_R3P2) { - u32 val = readl_relaxed(base + L310_PREFETCH_CTRL); + u32 val = l2x0_saved_regs.prefetch_ctrl; /* I don't think bit23 is required here... but iMX6 does so */ if (val & (BIT(30) | BIT(23))) { val &= ~(BIT(30) | BIT(23)); - l2c_write_sec(val, base, L310_PREFETCH_CTRL); + l2x0_saved_regs.prefetch_ctrl = val; errata[n++] = "752271"; } } @@ -800,6 +808,15 @@ static void l2c310_disable(void) l2c_disable(); } +static void l2c310_resume(void) +{ + l2c_resume(); + + /* Re-enable full-line-of-zeros for Cortex-A9 */ + if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); +} + static const struct l2c_init_data l2c310_init_fns __initconst = { .type = "L2C-310", .way_size_0 = SZ_8K, @@ -807,6 +824,7 @@ static const struct l2c_init_data l2c310_init_fns __initconst = { .enable = l2c310_enable, .fixup = l2c310_fixup, .save = l2c310_save, + .configure = l2c310_configure, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -818,13 +836,21 @@ static const struct l2c_init_data l2c310_init_fns __initconst = { }, }; -static void __init __l2c_init(const struct l2c_init_data *data, - u32 aux_val, u32 aux_mask, u32 cache_id) +static int __init __l2c_init(const struct l2c_init_data *data, + u32 aux_val, u32 aux_mask, u32 cache_id) { struct outer_cache_fns fns; unsigned way_size_bits, ways; u32 aux, old_aux; + /* + * Save the pointer globally so that callbacks which do not receive + * context from callers can access the structure. + */ + l2x0_data = kmemdup(data, sizeof(*data), GFP_KERNEL); + if (!l2x0_data) + return -ENOMEM; + /* * Sanity check the aux values. aux_mask is the bits we preserve * from reading the hardware register, and aux_val is the bits we @@ -910,6 +936,8 @@ static void __init __l2c_init(const struct l2c_init_data *data, data->type, ways, l2x0_size >> 10); pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n", data->type, cache_id, aux); + + return 0; } void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) @@ -936,6 +964,10 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) break; } + /* Read back current (default) hardware configuration */ + if (data->save) + data->save(l2x0_base); + __l2c_init(data, aux_val, aux_mask, cache_id); } @@ -1102,7 +1134,7 @@ static const struct l2c_init_data of_l2c210_data __initconst = { .flush_all = l2c210_flush_all, .disable = l2c_disable, .sync = l2c210_sync, - .resume = l2c210_resume, + .resume = l2c_resume, }, }; @@ -1120,7 +1152,7 @@ static const struct l2c_init_data of_l2c220_data __initconst = { .flush_all = l2c220_flush_all, .disable = l2c_disable, .sync = l2c220_sync, - .resume = l2c210_resume, + .resume = l2c_resume, }, }; @@ -1135,28 +1167,26 @@ static void __init l2c310_of_parse(const struct device_node *np, of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); if (tag[0] && tag[1] && tag[2]) - l2c_write_sec( + l2x0_saved_regs.tag_latency = L310_LATENCY_CTRL_RD(tag[0] - 1) | L310_LATENCY_CTRL_WR(tag[1] - 1) | - L310_LATENCY_CTRL_SETUP(tag[2] - 1), - l2x0_base, L310_TAG_LATENCY_CTRL); + L310_LATENCY_CTRL_SETUP(tag[2] - 1); of_property_read_u32_array(np, "arm,data-latency", data, ARRAY_SIZE(data)); if (data[0] && data[1] && data[2]) - l2c_write_sec( + l2x0_saved_regs.data_latency = L310_LATENCY_CTRL_RD(data[0] - 1) | L310_LATENCY_CTRL_WR(data[1] - 1) | - L310_LATENCY_CTRL_SETUP(data[2] - 1), - l2x0_base, L310_DATA_LATENCY_CTRL); + L310_LATENCY_CTRL_SETUP(data[2] - 1); of_property_read_u32_array(np, "arm,filter-ranges", filter, ARRAY_SIZE(filter)); if (filter[1]) { - l2c_write_sec(ALIGN(filter[0] + filter[1], SZ_1M), - l2x0_base, L310_ADDR_FILTER_END); - l2c_write_sec((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN, - l2x0_base, L310_ADDR_FILTER_START); + l2x0_saved_regs.filter_end = + ALIGN(filter[0] + filter[1], SZ_1M); + l2x0_saved_regs.filter_start = (filter[0] & ~(SZ_1M - 1)) + | L310_ADDR_FILTER_EN; } ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K); @@ -1188,6 +1218,7 @@ static const struct l2c_init_data of_l2c310_data __initconst = { .enable = l2c310_enable, .fixup = l2c310_fixup, .save = l2c310_save, + .configure = l2c310_configure, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -1216,6 +1247,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = { .enable = l2c310_enable, .fixup = l2c310_fixup, .save = l2c310_save, + .configure = l2c310_configure, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -1330,16 +1362,6 @@ static void aurora_save(void __iomem *base) l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL); } -static void aurora_resume(void) -{ - void __iomem *base = l2x0_base; - - if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) { - writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL); - writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL); - } -} - /* * For Aurora cache in no outer mode, enable via the CP15 coprocessor * broadcasting of cache commands to L2. @@ -1401,7 +1423,7 @@ static const struct l2c_init_data of_aurora_with_outer_data __initconst = { .flush_all = l2x0_flush_all, .disable = l2x0_disable, .sync = l2x0_cache_sync, - .resume = aurora_resume, + .resume = l2c_resume, }, }; @@ -1414,7 +1436,7 @@ static const struct l2c_init_data of_aurora_no_outer_data __initconst = { .fixup = aurora_fixup, .save = aurora_save, .outer_cache = { - .resume = aurora_resume, + .resume = l2c_resume, }, }; @@ -1562,6 +1584,7 @@ static const struct l2c_init_data of_bcm_l2x0_data __initconst = { .of_parse = l2c310_of_parse, .enable = l2c310_enable, .save = l2c310_save, + .configure = l2c310_configure, .outer_cache = { .inv_range = bcm_inv_range, .clean_range = bcm_clean_range, @@ -1583,18 +1606,12 @@ static void __init tauros3_save(void __iomem *base) readl_relaxed(base + L310_PREFETCH_CTRL); } -static void tauros3_resume(void) +static void tauros3_configure(void __iomem *base) { - void __iomem *base = l2x0_base; - - if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) { - writel_relaxed(l2x0_saved_regs.aux2_ctrl, - base + TAUROS3_AUX2_CTRL); - writel_relaxed(l2x0_saved_regs.prefetch_ctrl, - base + L310_PREFETCH_CTRL); - - l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8); - } + writel_relaxed(l2x0_saved_regs.aux2_ctrl, + base + TAUROS3_AUX2_CTRL); + writel_relaxed(l2x0_saved_regs.prefetch_ctrl, + base + L310_PREFETCH_CTRL); } static const struct l2c_init_data of_tauros3_data __initconst = { @@ -1603,9 +1620,10 @@ static const struct l2c_init_data of_tauros3_data __initconst = { .num_lock = 8, .enable = l2c_enable, .save = tauros3_save, + .configure = tauros3_configure, /* Tauros3 broadcasts L1 cache operations to L2 */ .outer_cache = { - .resume = tauros3_resume, + .resume = l2c_resume, }, }; @@ -1661,6 +1679,10 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) if (!of_property_read_bool(np, "cache-unified")) pr_err("L2C: device tree omits to specify unified cache\n"); + /* Read back current (default) hardware configuration */ + if (data->save) + data->save(l2x0_base); + /* L2 configuration can only be changed if the cache is disabled */ if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) if (data->of_parse) @@ -1671,8 +1693,6 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) else cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); - __l2c_init(data, aux_val, aux_mask, cache_id); - - return 0; + return __l2c_init(data, aux_val, aux_mask, cache_id); } #endif -- cgit v1.2.3 From c6d1a2d0078a30eb6290428a858c4e790a0e8691 Mon Sep 17 00:00:00 2001 From: Tomasz Figa Date: Thu, 8 Jan 2015 07:51:07 +0100 Subject: ARM: 8260/1: l2c: Add interface to ask hypervisor to configure L2C Because certain secure hypervisor do not allow writes to individual L2C registers, but rather expect set of parameters to be passed as argument to secure monitor calls, there is a need to provide an interface for the L2C driver to ask the firmware to configure the hardware according to specified parameters. This patch adds such. Signed-off-by: Tomasz Figa Signed-off-by: Marek Szyprowski Tested-by: Nishanth Menon Acked-by: Nishanth Menon Acked-by: Tony Lindgren Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index dde0d54ac41e..5288153f28b8 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -110,6 +110,11 @@ static inline void l2c_unlock(void __iomem *base, unsigned num) static void l2c_configure(void __iomem *base) { + if (outer_cache.configure) { + outer_cache.configure(&l2x0_saved_regs); + return; + } + if (l2x0_data->configure) l2x0_data->configure(base); @@ -910,6 +915,7 @@ static int __init __l2c_init(const struct l2c_init_data *data, fns = data->outer_cache; fns.write_sec = outer_cache.write_sec; + fns.configure = outer_cache.configure; if (data->fixup) data->fixup(l2x0_base, cache_id, &fns); -- cgit v1.2.3 From cf0681ca4cc5c9faa85a2df4c063b926fc09c977 Mon Sep 17 00:00:00 2001 From: Tomasz Figa Date: Thu, 8 Jan 2015 07:52:38 +0100 Subject: ARM: 8262/1: l2c: Add support for overriding prefetch settings Firmware on certain boards (e.g. ODROID-U3) can leave incorrect L2C prefetch settings configured in registers leading to crashes if L2C is enabled without overriding them. This patch introduces bindings to enable prefetch settings to be specified from DT and necessary support in the driver. [mszyprow: rebased onto v3.18-rc1, added error message when prefetch related dt property has been provided without any value] Signed-off-by: Tomasz Figa Signed-off-by: Marek Szyprowski Tested-by: Nishanth Menon Acked-by: Nishanth Menon Acked-by: Tony Lindgren Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 5288153f28b8..01de13809454 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -1169,6 +1169,8 @@ static void __init l2c310_of_parse(const struct device_node *np, u32 tag[3] = { 0, 0, 0 }; u32 filter[2] = { 0, 0 }; u32 assoc; + u32 prefetch; + u32 val; int ret; of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); @@ -1214,6 +1216,58 @@ static void __init l2c310_of_parse(const struct device_node *np, assoc); break; } + + prefetch = l2x0_saved_regs.prefetch_ctrl; + + ret = of_property_read_u32(np, "arm,double-linefill", &val); + if (ret == 0) { + if (val) + prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL; + else + prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF arm,double-linefill property value is missing\n"); + } + + ret = of_property_read_u32(np, "arm,double-linefill-incr", &val); + if (ret == 0) { + if (val) + prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_INCR; + else + prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_INCR; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF arm,double-linefill-incr property value is missing\n"); + } + + ret = of_property_read_u32(np, "arm,double-linefill-wrap", &val); + if (ret == 0) { + if (!val) + prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP; + else + prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF arm,double-linefill-wrap property value is missing\n"); + } + + ret = of_property_read_u32(np, "arm,prefetch-drop", &val); + if (ret == 0) { + if (val) + prefetch |= L310_PREFETCH_CTRL_PREFETCH_DROP; + else + prefetch &= ~L310_PREFETCH_CTRL_PREFETCH_DROP; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF arm,prefetch-drop property value is missing\n"); + } + + ret = of_property_read_u32(np, "arm,prefetch-offset", &val); + if (ret == 0) { + prefetch &= ~L310_PREFETCH_CTRL_OFFSET_MASK; + prefetch |= val & L310_PREFETCH_CTRL_OFFSET_MASK; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF arm,prefetch-offset property value is missing\n"); + } + + l2x0_saved_regs.prefetch_ctrl = prefetch; } static const struct l2c_init_data of_l2c310_data __initconst = { -- cgit v1.2.3 From 99a468d779f6851a31053e6a33bf91391034010b Mon Sep 17 00:00:00 2001 From: "George G. Davis" Date: Fri, 16 Jan 2015 11:21:05 +0100 Subject: ARM: 8286/1: mm: Fix dma_contiguous_reserve comment DMA contiguous allocations have been able to use highmem since commit "95b0e65 ARM: mm: don't limit default CMA region only to low memory" but a comment still notes the earlier "low memory" limitation. Update the comment to remove the low memory limitation and fix the s/contigouos/contiguous/ typo while we're at it. Signed-off-by: George G. Davis Acked-by: Marek Szyprowski Signed-off-by: Russell King --- arch/arm/mm/init.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 98ad9c79ea0e..d538dc73fcb0 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -319,10 +319,7 @@ void __init arm_memblock_init(const struct machine_desc *mdesc) early_init_fdt_scan_reserved_mem(); - /* - * reserve memory for DMA contigouos allocations, - * must come from DMA area inside low memory - */ + /* reserve memory for DMA contiguous allocations */ dma_contiguous_reserve(arm_dma_limit); arm_memblock_steal_permitted = false; -- cgit v1.2.3 From c2273a185354fe9420fb342b1ca09a6fed857fb3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 16 Jan 2015 18:01:43 +0100 Subject: ARM: 8288/1: dma-mapping: don't detach devices without an IOMMU during teardown When tearing down the DMA ops for a device via of_dma_deconfigure, we unconditionally detach the device from its IOMMU domain. For devices that aren't actually behind an IOMMU, this produces a "Not attached" warning message on the console. This patch changes the teardown code so that we don't detach from the IOMMU domain when there isn't an IOMMU dma mapping to start with. Reported-by: Laurent Pinchart Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 7864797609b3..f142ddd6c40a 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -2025,6 +2025,9 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) { struct dma_iommu_mapping *mapping = dev->archdata.mapping; + if (!mapping) + return; + arm_iommu_detach_device(dev); arm_iommu_release_mapping(mapping); } -- cgit v1.2.3 From fba289054f24d2550f47a1413e1ccc24f4165560 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 28 Jan 2015 17:58:33 +0100 Subject: ARM: 8298/1: ARM_KERNMEM_PERMS only works with MMU enabled The recently added ARM_KERNMEM_PERMS feature works by manipulating the kernel page tables, which obviously requires an MMU. Trying to enable this feature when the MMU is disabled results in a lot of compile errors in mm/init.c, so let's add a Kconfig dependency to avoid that case. Signed-off-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/mm/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 03823e784f63..c43c71455566 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -1012,6 +1012,7 @@ config ARCH_SUPPORTS_BIG_ENDIAN config ARM_KERNMEM_PERMS bool "Restrict kernel memory permissions" + depends on MMU help If this is set, kernel memory other than kernel text (and rodata) will be made non-executable. The tradeoff is that each region is -- cgit v1.2.3 From 8e64806672466392acf19e14427d1c29df3e58b9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 29 Jan 2015 16:41:46 +0100 Subject: ARM: 8299/1: mm: ensure local active ASID is marked as allocated on rollover Commit e1a5848e3398 ("ARM: 7924/1: mm: don't bother with reserved ttbr0 when running with LPAE") removed the use of the reserved TTBR0 value for LPAE systems, since the ASID is held in the TTBR and can be updated atomicly with the pgd of the next mm. Unfortunately, this patch forgot to update flush_context, which deliberately avoids marking the local active ASID as allocated, since we used to switch via ASID zero and didn't need to allocate the ASID of the previous mm. The side-effect of this is that we can allocate the same ASID to the next mm and, between flushing the local TLB and updating TTBR0, we can perform speculative TLB fills for userspace nG mappings using the page table of the previous mm. The consequence of this is that the next mm can erroneously hit some mappings of the previous mm. Note that this was made significantly harder to hit by a391263cd84e ("ARM: 8203/1: mm: try to re-use old ASID assignments following a rollover") but is still theoretically possible. This patch fixes the problem by removing the code from flush_context that forces the allocated ASID to zero for the local CPU. Many thanks to the Broadcom guys for tracking this one down. Fixes: e1a5848e3398 ("ARM: 7924/1: mm: don't bother with reserved ttbr0 when running with LPAE") Cc: # v3.14+ Reported-by: Raymond Ngun Tested-by: Raymond Ngun Reviewed-by: Gregory Fong Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/context.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 91892569710f..845769e41332 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -144,21 +144,17 @@ static void flush_context(unsigned int cpu) /* Update the list of reserved ASIDs and the ASID bitmap. */ bitmap_clear(asid_map, 0, NUM_USER_ASIDS); for_each_possible_cpu(i) { - if (i == cpu) { - asid = 0; - } else { - asid = atomic64_xchg(&per_cpu(active_asids, i), 0); - /* - * If this CPU has already been through a - * rollover, but hasn't run another task in - * the meantime, we must preserve its reserved - * ASID, as this is the only trace we have of - * the process it is still running. - */ - if (asid == 0) - asid = per_cpu(reserved_asids, i); - __set_bit(asid & ~ASID_MASK, asid_map); - } + asid = atomic64_xchg(&per_cpu(active_asids, i), 0); + /* + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. + */ + if (asid == 0) + asid = per_cpu(reserved_asids, i); + __set_bit(asid & ~ASID_MASK, asid_map); per_cpu(reserved_asids, i) = asid; } -- cgit v1.2.3 From 20e783e39e55c2615fb61d1b3d139ee9edcf6772 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 28 Jan 2015 17:54:38 +0100 Subject: ARM: 8296/1: cache-l2x0: clean up aurora cache handling The aurora cache controller is the only remaining user of a couple of functions in this file and are completely unused when that is disabled, leading to build warnings: arch/arm/mm/cache-l2x0.c:167:13: warning: 'l2x0_cache_sync' defined but not used [-Wunused-function] arch/arm/mm/cache-l2x0.c:184:13: warning: 'l2x0_flush_all' defined but not used [-Wunused-function] arch/arm/mm/cache-l2x0.c:194:13: warning: 'l2x0_disable' defined but not used [-Wunused-function] With the knowledge that the code is now aurora-specific, we can simplify it noticeably: - The pl310 errata workarounds are not needed on aurora and can be removed - As confirmed by Thomas Petazzoni from the data sheet, the cache_wait() macro is never needed. - No need to hold the lock across atomic cache sync - We can load the l2x0_base into a local variable across operations There should be no functional change in this patch, but readability and the generated object code improves, along with avoiding the warnings. (on Armada 370 RD and Armada XP GP, boot tested, plus a little bit of DMA traffic by reading data from a SD card) Acked-by: Thomas Petazzoni Tested-by: Thomas Petazzoni Signed-off-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 111 ++++++++++++++++------------------------------- 1 file changed, 38 insertions(+), 73 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 01de13809454..404c598da27d 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -156,73 +156,6 @@ static void l2c_disable(void) dsb(st); } -#ifdef CONFIG_CACHE_PL310 -static inline void cache_wait(void __iomem *reg, unsigned long mask) -{ - /* cache operations by line are atomic on PL310 */ -} -#else -#define cache_wait l2c_wait_mask -#endif - -static inline void cache_sync(void) -{ - void __iomem *base = l2x0_base; - - writel_relaxed(0, base + sync_reg_offset); - cache_wait(base + L2X0_CACHE_SYNC, 1); -} - -#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915) -static inline void debug_writel(unsigned long val) -{ - l2c_set_debug(l2x0_base, val); -} -#else -/* Optimised out for non-errata case */ -static inline void debug_writel(unsigned long val) -{ -} -#endif - -static void l2x0_cache_sync(void) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&l2x0_lock, flags); - cache_sync(); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); -} - -static void __l2x0_flush_all(void) -{ - debug_writel(0x03); - __l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY); - cache_sync(); - debug_writel(0x00); -} - -static void l2x0_flush_all(void) -{ - unsigned long flags; - - /* clean all ways */ - raw_spin_lock_irqsave(&l2x0_lock, flags); - __l2x0_flush_all(); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); -} - -static void l2x0_disable(void) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&l2x0_lock, flags); - __l2x0_flush_all(); - l2c_write_sec(0, l2x0_base, L2X0_CTRL); - dsb(st); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); -} - static void l2c_save(void __iomem *base) { l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); @@ -1349,14 +1282,15 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end) static void aurora_pa_range(unsigned long start, unsigned long end, unsigned long offset) { + void __iomem *base = l2x0_base; unsigned long flags; raw_spin_lock_irqsave(&l2x0_lock, flags); - writel_relaxed(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG); - writel_relaxed(end, l2x0_base + offset); + writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG); + writel_relaxed(end, base + offset); raw_spin_unlock_irqrestore(&l2x0_lock, flags); - cache_sync(); + writel_relaxed(0, base + AURORA_SYNC_REG); } static void aurora_inv_range(unsigned long start, unsigned long end) @@ -1416,6 +1350,37 @@ static void aurora_flush_range(unsigned long start, unsigned long end) } } +static void aurora_flush_all(void) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + /* clean all ways */ + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + + writel_relaxed(0, base + AURORA_SYNC_REG); +} + +static void aurora_cache_sync(void) +{ + writel_relaxed(0, l2x0_base + AURORA_SYNC_REG); +} + +static void aurora_disable(void) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + writel_relaxed(0, base + AURORA_SYNC_REG); + l2c_write_sec(0, base, L2X0_CTRL); + dsb(st); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + static void aurora_save(void __iomem *base) { l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL); @@ -1480,9 +1445,9 @@ static const struct l2c_init_data of_aurora_with_outer_data __initconst = { .inv_range = aurora_inv_range, .clean_range = aurora_clean_range, .flush_range = aurora_flush_range, - .flush_all = l2x0_flush_all, - .disable = l2x0_disable, - .sync = l2x0_cache_sync, + .flush_all = aurora_flush_all, + .disable = aurora_disable, + .sync = aurora_cache_sync, .resume = l2c_resume, }, }; -- cgit v1.2.3 From 1d88967900b87f94435581dad4ae319686c6ce10 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 28 Jan 2015 17:55:31 +0100 Subject: ARM: 8297/1: cache-l2x0: optimize aurora range operations The aurora_inv_range(), aurora_clean_range() and aurora_flush_range() functions are highly redundant, both in source and in object code, and they are harder to understand than necessary. By moving the range loop into the aurora_pa_range() function, they become trivial wrappers, and the object code start looking like what one would expect for an optimal implementation. Further optimization may be possible by using the per-CPU "virtual" registers to avoid the spinlocks in most cases. (on Armada 370 RD and Armada XP GP, boot tested, plus a little bit of DMA traffic by reading data from a SD card) Reviewed-by: Thomas Petazzoni Tested-by: Thomas Petazzoni Signed-off-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 68 ++++++++++++++++-------------------------------- 1 file changed, 22 insertions(+), 46 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 404c598da27d..5ea2d6d417f7 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -1256,7 +1256,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = { * noninclusive, while the hardware cache range operations use * inclusive start and end addresses. */ -static unsigned long calc_range_end(unsigned long start, unsigned long end) +static unsigned long aurora_range_end(unsigned long start, unsigned long end) { /* * Limit the number of cache lines processed at once, @@ -1275,26 +1275,13 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end) return end; } -/* - * Make sure 'start' and 'end' reference the same page, as L2 is PIPT - * and range operations only do a TLB lookup on the start address. - */ static void aurora_pa_range(unsigned long start, unsigned long end, - unsigned long offset) + unsigned long offset) { void __iomem *base = l2x0_base; + unsigned long range_end; unsigned long flags; - raw_spin_lock_irqsave(&l2x0_lock, flags); - writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG); - writel_relaxed(end, base + offset); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); - - writel_relaxed(0, base + AURORA_SYNC_REG); -} - -static void aurora_inv_range(unsigned long start, unsigned long end) -{ /* * round start and end adresses up to cache line size */ @@ -1302,15 +1289,24 @@ static void aurora_inv_range(unsigned long start, unsigned long end) end = ALIGN(end, CACHE_LINE_SIZE); /* - * Invalidate all full cache lines between 'start' and 'end'. + * perform operation on all full cache lines between 'start' and 'end' */ while (start < end) { - unsigned long range_end = calc_range_end(start, end); - aurora_pa_range(start, range_end - CACHE_LINE_SIZE, - AURORA_INVAL_RANGE_REG); + range_end = aurora_range_end(start, end); + + raw_spin_lock_irqsave(&l2x0_lock, flags); + writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG); + writel_relaxed(range_end - CACHE_LINE_SIZE, base + offset); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + + writel_relaxed(0, base + AURORA_SYNC_REG); start = range_end; } } +static void aurora_inv_range(unsigned long start, unsigned long end) +{ + aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG); +} static void aurora_clean_range(unsigned long start, unsigned long end) { @@ -1318,36 +1314,16 @@ static void aurora_clean_range(unsigned long start, unsigned long end) * If L2 is forced to WT, the L2 will always be clean and we * don't need to do anything here. */ - if (!l2_wt_override) { - start &= ~(CACHE_LINE_SIZE - 1); - end = ALIGN(end, CACHE_LINE_SIZE); - while (start != end) { - unsigned long range_end = calc_range_end(start, end); - aurora_pa_range(start, range_end - CACHE_LINE_SIZE, - AURORA_CLEAN_RANGE_REG); - start = range_end; - } - } + if (!l2_wt_override) + aurora_pa_range(start, end, AURORA_CLEAN_RANGE_REG); } static void aurora_flush_range(unsigned long start, unsigned long end) { - start &= ~(CACHE_LINE_SIZE - 1); - end = ALIGN(end, CACHE_LINE_SIZE); - while (start != end) { - unsigned long range_end = calc_range_end(start, end); - /* - * If L2 is forced to WT, the L2 will always be clean and we - * just need to invalidate. - */ - if (l2_wt_override) - aurora_pa_range(start, range_end - CACHE_LINE_SIZE, - AURORA_INVAL_RANGE_REG); - else - aurora_pa_range(start, range_end - CACHE_LINE_SIZE, - AURORA_FLUSH_RANGE_REG); - start = range_end; - } + if (l2_wt_override) + aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG); + else + aurora_pa_range(start, end, AURORA_FLUSH_RANGE_REG); } static void aurora_flush_all(void) -- cgit v1.2.3