summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-17 11:28:17 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-17 11:28:17 -0700
commitd109c4bb4513b8a63d56117f683901492093bfa9 (patch)
treeb3ab875fe22db852e7a04039e483c0e34417ac7a
parent42dc2a3048247109b0a5ee6345226cbd3e4f6410 (diff)
parentdf057cc7b4fa59e9b55f07ffdb6c62bf02e99a00 (diff)
Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 fixes from Will Deacon: "This addresses some problems with filesystem writeback due to the recently merged hardware DBM patches, which caused us to treat some read-only pages as dirty. There are also some other, less significant fixes that are described in the summary below: A mixture of fixes for regressions introduced during the merge window, some longer standing problems that we spotted and a couple of hardware errata. The main changes are: - Fix fallout from the h/w DBM patches, causing filesystem writeback issues on both v8 and v8.1 CPUs - Workaround for Cortex-A53 erratum #843419 in the module loader - Fix for long-standing issue with compat big-endian signal handlers using the saved floating point state" * tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: arm64: errata: add module build workaround for erratum #843419 arm64: compat: fix vfp save/restore across signal handlers in big-endian arm64: cpu hotplug: ensure we mask out CPU_TASKS_FROZEN in notifiers arm64: head.S: initialise mdcr_el2 in el2_setup arm64: enable generic idle loop arm64: pgtable: use a single bit for PTE_WRITE regardless of DBM arm64: Fix pte_modify() to preserve the hardware dirty information arm64: Fix the pte_hw_dirty() check when AF/DBM is enabled arm64: dma-mapping: check whether cma area is initialized or not
-rw-r--r--arch/arm64/Kconfig17
-rw-r--r--arch/arm64/Makefile4
-rw-r--r--arch/arm64/include/asm/pgtable.h12
-rw-r--r--arch/arm64/kernel/debug-monitors.c2
-rw-r--r--arch/arm64/kernel/head.S5
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c2
-rw-r--r--arch/arm64/kernel/module.c2
-rw-r--r--arch/arm64/kernel/signal32.c47
-rw-r--r--arch/arm64/mm/dma-mapping.c2
9 files changed, 71 insertions, 22 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7d95663c0160..07d1811aa03f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -32,6 +32,7 @@ config ARM64
select GENERIC_CLOCKEVENTS_BROADCAST
select GENERIC_CPU_AUTOPROBE
select GENERIC_EARLY_IOREMAP
+ select GENERIC_IDLE_POLL_SETUP
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
select GENERIC_IRQ_SHOW_LEVEL
@@ -331,6 +332,22 @@ config ARM64_ERRATUM_845719
If unsure, say Y.
+config ARM64_ERRATUM_843419
+ bool "Cortex-A53: 843419: A load or store might access an incorrect address"
+ depends on MODULES
+ default y
+ help
+ This option builds kernel modules using the large memory model in
+ order to avoid the use of the ADRP instruction, which can cause
+ a subsequent memory access to use an incorrect address on Cortex-A53
+ parts up to r0p4.
+
+ Note that the kernel itself must be linked with a version of ld
+ which fixes potentially affected ADRP instructions through the
+ use of veneers.
+
+ If unsure, say Y.
+
endmenu
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 15ff5b4156fd..f9914d7c1bb0 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -41,6 +41,10 @@ endif
CHECKFLAGS += -D__aarch64__
+ifeq ($(CONFIG_ARM64_ERRATUM_843419), y)
+CFLAGS_MODULE += -mcmodel=large
+endif
+
# Default value
head-y := arch/arm64/kernel/head.o
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 6900b2d95371..b0329be95cb1 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -26,13 +26,9 @@
* Software defined PTE bits definition.
*/
#define PTE_VALID (_AT(pteval_t, 1) << 0)
+#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */
#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
-#ifdef CONFIG_ARM64_HW_AFDBM
-#define PTE_WRITE (PTE_DBM) /* same as DBM */
-#else
-#define PTE_WRITE (_AT(pteval_t, 1) << 57)
-#endif
#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
/*
@@ -146,7 +142,7 @@ extern struct page *empty_zero_page;
#define pte_exec(pte) (!(pte_val(pte) & PTE_UXN))
#ifdef CONFIG_ARM64_HW_AFDBM
-#define pte_hw_dirty(pte) (!(pte_val(pte) & PTE_RDONLY))
+#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
#else
#define pte_hw_dirty(pte) (0)
#endif
@@ -238,7 +234,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
* When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via
* the page fault mechanism. Checking the dirty status of a pte becomes:
*
- * PTE_DIRTY || !PTE_RDONLY
+ * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
*/
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
@@ -503,7 +499,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK;
/* preserve the hardware dirty information */
if (pte_hw_dirty(pte))
- newprot |= PTE_DIRTY;
+ pte = pte_mkdirty(pte);
pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
return pte;
}
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 9b3b62ac9c24..cebf78661a55 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -134,7 +134,7 @@ static int os_lock_notify(struct notifier_block *self,
unsigned long action, void *data)
{
int cpu = (unsigned long)data;
- if (action == CPU_ONLINE)
+ if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
smp_call_function_single(cpu, clear_os_lock, NULL, 1);
return NOTIFY_OK;
}
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index a055be6125cf..90d09eddd5b2 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -523,6 +523,11 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
msr hstr_el2, xzr // Disable CP15 traps to EL2
#endif
+ /* EL2 debug */
+ mrs x0, pmcr_el0 // Disable debug access traps
+ ubfx x0, x0, #11, #5 // to EL2 and allow access to
+ msr mdcr_el2, x0 // all PMU counters from EL1
+
/* Stage-2 translation */
msr vttbr_el2, xzr
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index c97040ecf838..bba85c8f8037 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -872,7 +872,7 @@ static int hw_breakpoint_reset_notify(struct notifier_block *self,
void *hcpu)
{
int cpu = (long)hcpu;
- if (action == CPU_ONLINE)
+ if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1);
return NOTIFY_OK;
}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 67bf4107f6ef..876eb8df50bf 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -332,12 +332,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
AARCH64_INSN_IMM_ADR);
break;
+#ifndef CONFIG_ARM64_ERRATUM_843419
case R_AARCH64_ADR_PREL_PG_HI21_NC:
overflow_check = false;
case R_AARCH64_ADR_PREL_PG_HI21:
ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21,
AARCH64_INSN_IMM_ADR);
break;
+#endif
case R_AARCH64_ADD_ABS_LO12_NC:
case R_AARCH64_LDST8_ABS_LO12_NC:
overflow_check = false;
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 948f0ad2de23..71ef6dc89ae5 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -212,14 +212,32 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
/*
* VFP save/restore code.
+ *
+ * We have to be careful with endianness, since the fpsimd context-switch
+ * code operates on 128-bit (Q) register values whereas the compat ABI
+ * uses an array of 64-bit (D) registers. Consequently, we need to swap
+ * the two halves of each Q register when running on a big-endian CPU.
*/
+union __fpsimd_vreg {
+ __uint128_t raw;
+ struct {
+#ifdef __AARCH64EB__
+ u64 hi;
+ u64 lo;
+#else
+ u64 lo;
+ u64 hi;
+#endif
+ };
+};
+
static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
{
struct fpsimd_state *fpsimd = &current->thread.fpsimd_state;
compat_ulong_t magic = VFP_MAGIC;
compat_ulong_t size = VFP_STORAGE_SIZE;
compat_ulong_t fpscr, fpexc;
- int err = 0;
+ int i, err = 0;
/*
* Save the hardware registers to the fpsimd_state structure.
@@ -235,10 +253,15 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
/*
* Now copy the FP registers. Since the registers are packed,
* we can copy the prefix we want (V0-V15) as it is.
- * FIXME: Won't work if big endian.
*/
- err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs,
- sizeof(frame->ufp.fpregs));
+ for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) {
+ union __fpsimd_vreg vreg = {
+ .raw = fpsimd->vregs[i >> 1],
+ };
+
+ __put_user_error(vreg.lo, &frame->ufp.fpregs[i], err);
+ __put_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err);
+ }
/* Create an AArch32 fpscr from the fpsr and the fpcr. */
fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) |
@@ -263,7 +286,7 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
compat_ulong_t magic = VFP_MAGIC;
compat_ulong_t size = VFP_STORAGE_SIZE;
compat_ulong_t fpscr;
- int err = 0;
+ int i, err = 0;
__get_user_error(magic, &frame->magic, err);
__get_user_error(size, &frame->size, err);
@@ -273,12 +296,14 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE)
return -EINVAL;
- /*
- * Copy the FP registers into the start of the fpsimd_state.
- * FIXME: Won't work if big endian.
- */
- err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs,
- sizeof(frame->ufp.fpregs));
+ /* Copy the FP registers into the start of the fpsimd_state. */
+ for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) {
+ union __fpsimd_vreg vreg;
+
+ __get_user_error(vreg.lo, &frame->ufp.fpregs[i], err);
+ __get_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err);
+ fpsimd.vregs[i >> 1] = vreg.raw;
+ }
/* Extract the fpsr and the fpcr from the fpscr */
__get_user_error(fpscr, &frame->ufp.fpscr, err);
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 0bcc4bc94b4a..99224dcebdc5 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -100,7 +100,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
if (IS_ENABLED(CONFIG_ZONE_DMA) &&
dev->coherent_dma_mask <= DMA_BIT_MASK(32))
flags |= GFP_DMA;
- if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) {
+ if (dev_get_cma_area(dev) && (flags & __GFP_WAIT)) {
struct page *page;
void *addr;