summaryrefslogtreecommitdiff
path: root/arch/arm64/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/mm')
-rw-r--r--arch/arm64/mm/fault.c48
-rw-r--r--arch/arm64/mm/init.c2
-rw-r--r--arch/arm64/mm/kasan_init.c2
-rw-r--r--arch/arm64/mm/mmu.c113
-rw-r--r--arch/arm64/mm/numa.c2
-rw-r--r--arch/arm64/mm/proc.S49
6 files changed, 142 insertions, 74 deletions
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index ad49ae8f3967..f3d3f2e97add 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -79,18 +79,33 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
#endif
/*
- * Dump out the page tables associated with 'addr' in mm 'mm'.
+ * Dump out the page tables associated with 'addr' in the currently active mm.
*/
-void show_pte(struct mm_struct *mm, unsigned long addr)
+void show_pte(unsigned long addr)
{
+ struct mm_struct *mm;
pgd_t *pgd;
- if (!mm)
+ if (addr < TASK_SIZE) {
+ /* TTBR0 */
+ mm = current->active_mm;
+ if (mm == &init_mm) {
+ pr_alert("[%016lx] user address but active_mm is swapper\n",
+ addr);
+ return;
+ }
+ } else if (addr >= VA_START) {
+ /* TTBR1 */
mm = &init_mm;
+ } else {
+ pr_alert("[%016lx] address between user and kernel address ranges\n",
+ addr);
+ return;
+ }
pr_alert("pgd = %p\n", mm->pgd);
pgd = pgd_offset(mm, addr);
- pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd));
+ pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd));
do {
pud_t *pud;
@@ -176,8 +191,8 @@ static bool is_el1_instruction_abort(unsigned int esr)
/*
* The kernel tried to access some page that wasn't present.
*/
-static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
- unsigned int esr, struct pt_regs *regs)
+static void __do_kernel_fault(unsigned long addr, unsigned int esr,
+ struct pt_regs *regs)
{
/*
* Are we prepared to handle this kernel fault?
@@ -194,7 +209,7 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
(addr < PAGE_SIZE) ? "NULL pointer dereference" :
"paging request", addr);
- show_pte(mm, addr);
+ show_pte(addr);
die("Oops", regs, esr);
bust_spinlocks(0);
do_exit(SIGKILL);
@@ -216,7 +231,6 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n",
tsk->comm, task_pid_nr(tsk), inf->name, sig,
addr, esr);
- show_pte(tsk->mm, addr);
show_regs(regs);
}
@@ -232,7 +246,6 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->active_mm;
const struct fault_info *inf;
/*
@@ -243,7 +256,7 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
inf = esr_to_fault_info(esr);
__do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs);
} else
- __do_kernel_fault(mm, addr, esr, regs);
+ __do_kernel_fault(addr, esr, regs);
}
#define VM_FAULT_BADMAP 0x010000
@@ -306,7 +319,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
struct task_struct *tsk;
struct mm_struct *mm;
int fault, sig, code;
- unsigned long vm_flags = VM_READ | VM_WRITE;
+ unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (notify_page_fault(regs, esr))
@@ -454,7 +467,7 @@ retry:
return 0;
no_context:
- __do_kernel_fault(mm, addr, esr, regs);
+ __do_kernel_fault(addr, esr, regs);
return 0;
}
@@ -673,11 +686,12 @@ void __init hook_debug_fault_code(int nr,
debug_fault_info[nr].name = name;
}
-asmlinkage int __exception do_debug_exception(unsigned long addr,
+asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint,
unsigned int esr,
struct pt_regs *regs)
{
const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
+ unsigned long pc = instruction_pointer(regs);
struct siginfo info;
int rv;
@@ -688,19 +702,19 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
if (interrupts_enabled(regs))
trace_hardirqs_off();
- if (user_mode(regs) && instruction_pointer(regs) > TASK_SIZE)
+ if (user_mode(regs) && pc > TASK_SIZE)
arm64_apply_bp_hardening();
- if (!inf->fn(addr, esr, regs)) {
+ if (!inf->fn(addr_if_watchpoint, esr, regs)) {
rv = 1;
} else {
pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n",
- inf->name, esr, addr);
+ inf->name, esr, pc);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
- info.si_addr = (void __user *)addr;
+ info.si_addr = (void __user *)pc;
arm64_notify_die("", regs, &info, 0);
rv = 0;
}
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index fa6b2fad7a3d..5d3df68272f5 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -272,7 +272,7 @@ void __init arm64_memblock_init(void)
* memory spans, randomize the linear region as well.
*/
if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) {
- range = range / ARM64_MEMSTART_ALIGN + 1;
+ range /= ARM64_MEMSTART_ALIGN;
memstart_addr -= ARM64_MEMSTART_ALIGN *
((range * memstart_offset_seed) >> 16);
}
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 757009daa9ed..ff43da269fe8 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -153,7 +153,7 @@ void __init kasan_init(void)
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
vmemmap_populate(kimg_shadow_start, kimg_shadow_end,
- pfn_to_nid(virt_to_pfn(_text)));
+ pfn_to_nid(virt_to_pfn(lm_alias(_text))));
/*
* vmemmap_populate() has populated the shadow region that covers the
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 0a56898f8410..60be5bc0984a 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -28,8 +28,6 @@
#include <linux/memblock.h>
#include <linux/fs.h>
#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/stop_machine.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
@@ -95,6 +93,17 @@ static phys_addr_t __init early_pgtable_alloc(void)
return phys;
}
+static bool pgattr_change_is_safe(u64 old, u64 new)
+{
+ /*
+ * The following mapping attributes may be updated in live
+ * kernel mappings without the need for break-before-make.
+ */
+ static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE;
+
+ return old == 0 || new == 0 || ((old ^ new) & ~mask) == 0;
+}
+
static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
unsigned long end, unsigned long pfn,
pgprot_t prot,
@@ -115,8 +124,17 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
pte = pte_set_fixmap_offset(pmd, addr);
do {
+ pte_t old_pte = *pte;
+
set_pte(pte, pfn_pte(pfn, prot));
pfn++;
+
+ /*
+ * After the PTE entry has been populated once, we
+ * only allow updates to the permission attributes.
+ */
+ BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte)));
+
} while (pte++, addr += PAGE_SIZE, addr != end);
pte_clear_fixmap();
@@ -146,27 +164,27 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
pmd = pmd_set_fixmap_offset(pud, addr);
do {
+ pmd_t old_pmd = *pmd;
+
next = pmd_addr_end(addr, end);
+
/* try section mapping first */
if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
allow_block_mappings) {
- pmd_t old_pmd =*pmd;
pmd_set_huge(pmd, phys, prot);
+
/*
- * Check for previous table entries created during
- * boot (__create_page_tables) and flush them.
+ * After the PMD entry has been populated once, we
+ * only allow updates to the permission attributes.
*/
- if (!pmd_none(old_pmd)) {
- flush_tlb_all();
- if (pmd_table(old_pmd)) {
- phys_addr_t table = pmd_page_paddr(old_pmd);
- if (!WARN_ON_ONCE(slab_is_available()))
- memblock_free(table, PAGE_SIZE);
- }
- }
+ BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
+ pmd_val(*pmd)));
} else {
alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
prot, pgtable_alloc);
+
+ BUG_ON(pmd_val(old_pmd) != 0 &&
+ pmd_val(old_pmd) != pmd_val(*pmd));
}
phys += next - addr;
} while (pmd++, addr = next, addr != end);
@@ -204,33 +222,28 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
pud = pud_set_fixmap_offset(pgd, addr);
do {
+ pud_t old_pud = *pud;
+
next = pud_addr_end(addr, end);
/*
* For 4K granule only, attempt to put down a 1GB block
*/
if (use_1G_block(addr, next, phys) && allow_block_mappings) {
- pud_t old_pud = *pud;
pud_set_huge(pud, phys, prot);
/*
- * If we have an old value for a pud, it will
- * be pointing to a pmd table that we no longer
- * need (from swapper_pg_dir).
- *
- * Look up the old pmd table and free it.
+ * After the PUD entry has been populated once, we
+ * only allow updates to the permission attributes.
*/
- if (!pud_none(old_pud)) {
- flush_tlb_all();
- if (pud_table(old_pud)) {
- phys_addr_t table = pud_page_paddr(old_pud);
- if (!WARN_ON_ONCE(slab_is_available()))
- memblock_free(table, PAGE_SIZE);
- }
- }
+ BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
+ pud_val(*pud)));
} else {
alloc_init_pmd(pud, addr, next, phys, prot,
pgtable_alloc, allow_block_mappings);
+
+ BUG_ON(pud_val(old_pud) != 0 &&
+ pud_val(old_pud) != pud_val(*pud));
}
phys += next - addr;
} while (pud++, addr = next, addr != end);
@@ -396,6 +409,9 @@ void mark_rodata_ro(void)
section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata,
section_size, PAGE_KERNEL_RO);
+
+ /* flush the TLBs after updating live kernel mappings */
+ flush_tlb_all();
}
static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
@@ -479,8 +495,8 @@ static void __init map_kernel(pgd_t *pgd)
* entry instead.
*/
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
- set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START),
- __pud(__pa(bm_pmd) | PUD_TYPE_TABLE));
+ pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START),
+ lm_alias(bm_pmd));
pud_clear_fixmap();
} else {
BUG();
@@ -595,7 +611,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
if (!p)
return -ENOMEM;
- set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL));
+ pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));
} else
vmemmap_verify((pte_t *)pmd, node, addr, next);
} while (addr = next, addr != end);
@@ -765,26 +781,49 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
int __init arch_ioremap_pud_supported(void)
{
- /* only 4k granule supports level 1 block mappings */
- return IS_ENABLED(CONFIG_ARM64_4K_PAGES);
+ /*
+ * Only 4k granule supports level 1 block mappings.
+ * SW table walks can't handle removal of intermediate entries.
+ */
+ return IS_ENABLED(CONFIG_ARM64_4K_PAGES) &&
+ !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS);
}
int __init arch_ioremap_pmd_supported(void)
{
- return 1;
+ /* See arch_ioremap_pud_supported() */
+ return !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS);
}
-int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
+int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
{
+ pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
+ pgprot_val(mk_sect_prot(prot)));
+ pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot);
+
+ /* Only allow permission changes for now */
+ if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)),
+ pud_val(new_pud)))
+ return 0;
+
BUG_ON(phys & ~PUD_MASK);
- set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+ set_pud(pudp, new_pud);
return 1;
}
-int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
+int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
{
+ pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
+ pgprot_val(mk_sect_prot(prot)));
+ pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot);
+
+ /* Only allow permission changes for now */
+ if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)),
+ pmd_val(new_pmd)))
+ return 0;
+
BUG_ON(phys & ~PMD_MASK);
- set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+ set_pmd(pmdp, new_pmd);
return 1;
}
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 4b32168cf91a..b1e42bad69ac 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -424,7 +424,7 @@ static int __init dummy_numa_init(void)
if (numa_off)
pr_info("NUMA disabled\n"); /* Forced off on command line. */
pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n",
- 0LLU, PFN_PHYS(max_pfn) - 1);
+ memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1);
for_each_memblock(memory, mblk) {
ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 18d96d349a8b..3b95e3126eeb 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -64,17 +64,18 @@ ENTRY(cpu_do_suspend)
mrs x2, tpidr_el0
mrs x3, tpidrro_el0
mrs x4, contextidr_el1
- mrs x5, cpacr_el1
- mrs x6, tcr_el1
- mrs x7, vbar_el1
- mrs x8, mdscr_el1
- mrs x9, oslsr_el1
- mrs x10, sctlr_el1
+ mrs x5, osdlr_el1
+ mrs x6, cpacr_el1
+ mrs x7, tcr_el1
+ mrs x8, vbar_el1
+ mrs x9, mdscr_el1
+ mrs x10, oslsr_el1
+ mrs x11, sctlr_el1
stp x2, x3, [x0]
- stp x4, xzr, [x0, #16]
- stp x5, x6, [x0, #32]
- stp x7, x8, [x0, #48]
- stp x9, x10, [x0, #64]
+ stp x4, x5, [x0, #16]
+ stp x6, x7, [x0, #32]
+ stp x8, x9, [x0, #48]
+ stp x10, x11, [x0, #64]
ret
ENDPROC(cpu_do_suspend)
@@ -96,8 +97,8 @@ ENTRY(cpu_do_resume)
msr cpacr_el1, x6
/* Don't change t0sz here, mask those bits when restoring */
- mrs x5, tcr_el1
- bfi x8, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+ mrs x7, tcr_el1
+ bfi x8, x7, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
msr tcr_el1, x8
msr vbar_el1, x9
@@ -115,6 +116,7 @@ ENTRY(cpu_do_resume)
/*
* Restore oslsr_el1 by writing oslar_el1
*/
+ msr osdlr_el1, x5
ubfx x11, x11, #1, #1
msr oslar_el1, x11
reset_pmuserenr_el0 x0 // Disable PMU access from EL0
@@ -181,7 +183,8 @@ ENDPROC(idmap_cpu_replace_ttbr1)
dc cvac, cur_\()\type\()p // Ensure any existing dirty
dmb sy // lines are written back before
ldr \type, [cur_\()\type\()p] // loading the entry
- tbz \type, #0, next_\()\type // Skip invalid entries
+ tbz \type, #0, skip_\()\type // Skip invalid and
+ tbnz \type, #11, skip_\()\type // non-global entries
.endm
.macro __idmap_kpti_put_pgtable_ent_ng, type
@@ -241,8 +244,9 @@ ENTRY(idmap_kpti_install_ng_mappings)
add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
do_pgd: __idmap_kpti_get_pgtable_ent pgd
tbnz pgd, #1, walk_puds
- __idmap_kpti_put_pgtable_ent_ng pgd
next_pgd:
+ __idmap_kpti_put_pgtable_ent_ng pgd
+skip_pgd:
add cur_pgdp, cur_pgdp, #8
cmp cur_pgdp, end_pgdp
b.ne do_pgd
@@ -259,6 +263,15 @@ next_pgd:
msr sctlr_el1, x18
isb
+ /*
+ * Invalidate the local I-cache so that any instructions fetched
+ * speculatively from the PoC are discarded, since they may have
+ * been dynamically patched at the PoU.
+ */
+ ic iallu
+ dsb nsh
+ isb
+
/* Set the flag to zero to indicate that we're all done */
str wzr, [flag_ptr]
ret
@@ -270,8 +283,9 @@ walk_puds:
add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
do_pud: __idmap_kpti_get_pgtable_ent pud
tbnz pud, #1, walk_pmds
- __idmap_kpti_put_pgtable_ent_ng pud
next_pud:
+ __idmap_kpti_put_pgtable_ent_ng pud
+skip_pud:
add cur_pudp, cur_pudp, 8
cmp cur_pudp, end_pudp
b.ne do_pud
@@ -290,8 +304,9 @@ walk_pmds:
add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
do_pmd: __idmap_kpti_get_pgtable_ent pmd
tbnz pmd, #1, walk_ptes
- __idmap_kpti_put_pgtable_ent_ng pmd
next_pmd:
+ __idmap_kpti_put_pgtable_ent_ng pmd
+skip_pmd:
add cur_pmdp, cur_pmdp, #8
cmp cur_pmdp, end_pmdp
b.ne do_pmd
@@ -309,7 +324,7 @@ walk_ptes:
add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
do_pte: __idmap_kpti_get_pgtable_ent pte
__idmap_kpti_put_pgtable_ent_ng pte
-next_pte:
+skip_pte:
add cur_ptep, cur_ptep, #8
cmp cur_ptep, end_ptep
b.ne do_pte