summaryrefslogtreecommitdiff
path: root/arch/arm64/kernel/smp.c
diff options
context:
space:
mode:
authorSuzuki K Poulose <suzuki.poulose@arm.com>2016-02-23 10:31:42 +0000
committerCatalin Marinas <catalin.marinas@arm.com>2016-02-25 10:32:23 +0000
commitbb9052744f4b7ae11d0061ac9492dd2949981b49 (patch)
tree1517029d99754433b22dd468bb5476514ef8f76a /arch/arm64/kernel/smp.c
parentfce6361fe9b0caeba0c05b7d72ceda406f8780df (diff)
arm64: Handle early CPU boot failures
A secondary CPU could fail to come online due to insufficient capabilities and could simply die or loop in the kernel. e.g, a CPU with no support for the selected kernel PAGE_SIZE loops in kernel with MMU turned off. or a hotplugged CPU which doesn't have one of the advertised system capability will die during the activation. There is no way to synchronise the status of the failing CPU back to the master. This patch solves the issue by adding a field to the secondary_data which can be updated by the failing CPU. If the secondary CPU fails even before turning the MMU on, it updates the status in a special variable reserved in the head.txt section to make sure that the update can be cache invalidated safely without possible sharing of cache write back granule. Here are the possible states : -1. CPU_MMU_OFF - Initial value set by the master CPU, this value indicates that the CPU could not turn the MMU on, hence the status could not be reliably updated in the secondary_data. Instead, the CPU has updated the status @ __early_cpu_boot_status. 0. CPU_BOOT_SUCCESS - CPU has booted successfully. 1. CPU_KILL_ME - CPU has invoked cpu_ops->die, indicating the master CPU to synchronise by issuing a cpu_ops->cpu_kill. 2. CPU_STUCK_IN_KERNEL - CPU couldn't invoke die(), instead is looping in the kernel. This information could be used by say, kexec to check if it is really safe to do a kexec reboot. 3. CPU_PANIC_KERNEL - CPU detected some serious issues which requires kernel to crash immediately. The secondary CPU cannot call panic() until it has initialised the GIC. This flag can be used to instruct the master to do so. Cc: Mark Rutland <mark.rutland@arm.com> Acked-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com> [catalin.marinas@arm.com: conflict resolution] [catalin.marinas@arm.com: converted "status" from int to long] [catalin.marinas@arm.com: updated update_early_cpu_boot_status to use str_l] Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Diffstat (limited to 'arch/arm64/kernel/smp.c')
-rw-r--r--arch/arm64/kernel/smp.c45
1 files changed, 45 insertions, 0 deletions
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 8d09f597024d..d07b19654307 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -63,6 +63,8 @@
* where to place its SVC stack
*/
struct secondary_data secondary_data;
+/* Number of CPUs which aren't online, but looping in kernel text. */
+int cpus_stuck_in_kernel;
enum ipi_msg_type {
IPI_RESCHEDULE,
@@ -73,6 +75,16 @@ enum ipi_msg_type {
IPI_WAKEUP
};
+#ifdef CONFIG_HOTPLUG_CPU
+static int op_cpu_kill(unsigned int cpu);
+#else
+static inline int op_cpu_kill(unsigned int cpu)
+{
+ return -ENOSYS;
+}
+#endif
+
+
/*
* Boot a secondary CPU, and assign it the specified idle task.
* This also gives us the initial stack to use for this CPU.
@@ -90,12 +102,14 @@ static DECLARE_COMPLETION(cpu_running);
int __cpu_up(unsigned int cpu, struct task_struct *idle)
{
int ret;
+ long status;
/*
* We need to tell the secondary core where to find its stack and the
* page tables.
*/
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
+ update_cpu_boot_status(CPU_MMU_OFF);
__flush_dcache_area(&secondary_data, sizeof(secondary_data));
/*
@@ -119,6 +133,32 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
}
secondary_data.stack = NULL;
+ status = READ_ONCE(secondary_data.status);
+ if (ret && status) {
+
+ if (status == CPU_MMU_OFF)
+ status = READ_ONCE(__early_cpu_boot_status);
+
+ switch (status) {
+ default:
+ pr_err("CPU%u: failed in unknown state : 0x%lx\n",
+ cpu, status);
+ break;
+ case CPU_KILL_ME:
+ if (!op_cpu_kill(cpu)) {
+ pr_crit("CPU%u: died during early boot\n", cpu);
+ break;
+ }
+ /* Fall through */
+ pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
+ case CPU_STUCK_IN_KERNEL:
+ pr_crit("CPU%u: is stuck in kernel\n", cpu);
+ cpus_stuck_in_kernel++;
+ break;
+ case CPU_PANIC_KERNEL:
+ panic("CPU%u detected unsupported configuration\n", cpu);
+ }
+ }
return ret;
}
@@ -184,6 +224,9 @@ asmlinkage void secondary_start_kernel(void)
*/
pr_info("CPU%u: Booted secondary processor [%08x]\n",
cpu, read_cpuid_id());
+ update_cpu_boot_status(CPU_BOOT_SUCCESS);
+ /* Make sure the status update is visible before we complete */
+ smp_wmb();
set_cpu_online(cpu, true);
complete(&cpu_running);
@@ -326,10 +369,12 @@ void cpu_die_early(void)
set_cpu_present(cpu, 0);
#ifdef CONFIG_HOTPLUG_CPU
+ update_cpu_boot_status(CPU_KILL_ME);
/* Check if we can park ourselves */
if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
cpu_ops[cpu]->cpu_die(cpu);
#endif
+ update_cpu_boot_status(CPU_STUCK_IN_KERNEL);
cpu_park_loop();
}