From c643f5b6a0a3d8f17c1fce22eae43c880acb5bad Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 2 Aug 2019 14:29:24 -0500 Subject: powerpc/rtas: use device model APIs and serialization during LPM [ Upstream commit a6717c01ddc259f6f73364779df058e2c67309f8 ] The LPAR migration implementation and userspace-initiated cpu hotplug can interleave their executions like so: 1. Set cpu 7 offline via sysfs. 2. Begin a partition migration, whose implementation requires the OS to ensure all present cpus are online; cpu 7 is onlined: rtas_ibm_suspend_me -> rtas_online_cpus_mask -> cpu_up This sets cpu 7 online in all respects except for the cpu's corresponding struct device; dev->offline remains true. 3. Set cpu 7 online via sysfs. _cpu_up() determines that cpu 7 is already online and returns success. The driver core (device_online) sets dev->offline = false. 4. The migration completes and restores cpu 7 to offline state: rtas_ibm_suspend_me -> rtas_offline_cpus_mask -> cpu_down This leaves cpu7 in a state where the driver core considers the cpu device online, but in all other respects it is offline and unused. Attempts to online the cpu via sysfs appear to succeed but the driver core actually does not pass the request to the lower-level cpuhp support code. This makes the cpu unusable until the cpu device is manually set offline and then online again via sysfs. Instead of directly calling cpu_up/cpu_down, the migration code should use the higher-level device core APIs to maintain consistent state and serialize operations. Fixes: 120496ac2d2d ("powerpc: Bring all threads online prior to migration/hibernation") Signed-off-by: Nathan Lynch Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190802192926.19277-2-nathanl@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/kernel/rtas.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel/rtas.c') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 1643e9e53655..141d192c6953 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -874,15 +874,17 @@ static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, return 0; for_each_cpu(cpu, cpus) { + struct device *dev = get_cpu_device(cpu); + switch (state) { case DOWN: - cpuret = cpu_down(cpu); + cpuret = device_offline(dev); break; case UP: - cpuret = cpu_up(cpu); + cpuret = device_online(dev); break; } - if (cpuret) { + if (cpuret < 0) { pr_debug("%s: cpu_%s for cpu#%d returned %d.\n", __func__, ((state == UP) ? "up" : "down"), @@ -971,6 +973,8 @@ int rtas_ibm_suspend_me(u64 handle) data.token = rtas_token("ibm,suspend-me"); data.complete = &done; + lock_device_hotplug(); + /* All present CPUs must be online */ cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask); cpuret = rtas_online_cpus_mask(offline_mask); @@ -1002,6 +1006,7 @@ int rtas_ibm_suspend_me(u64 handle) __func__); out: + unlock_device_hotplug(); free_cpumask_var(offline_mask); return atomic_read(&data.error); } -- cgit v1.2.3 From 2c4c8ad782e4215b6f374535b4cac085e6228aff Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Mon, 17 Sep 2018 14:14:02 -0500 Subject: powerpc/pseries: Disable CPU hotplug across migrations [ Upstream commit 85a88cabad57d26d826dd94ea34d3a785824d802 ] When performing partition migrations all present CPUs must be online as all present CPUs must make the H_JOIN call as part of the migration process. Once all present CPUs make the H_JOIN call, one CPU is returned to make the rtas call to perform the migration to the destination system. During testing of migration and changing the SMT state we have found instances where CPUs are offlined, as part of the SMT state change, before they make the H_JOIN call. This results in a hung system where every CPU is either in H_JOIN or offline. To prevent this this patch disables CPU hotplug during the migration process. Signed-off-by: Nathan Fontenot Reviewed-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/kernel/rtas.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel/rtas.c') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 141d192c6953..a01f83ba739e 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -984,6 +984,7 @@ int rtas_ibm_suspend_me(u64 handle) goto out; } + cpu_hotplug_disable(); stop_topology_update(); /* Call function on all CPUs. One of us will make the @@ -998,6 +999,7 @@ int rtas_ibm_suspend_me(u64 handle) printk(KERN_ERR "Error doing global join\n"); start_topology_update(); + cpu_hotplug_enable(); /* Take down CPUs not online prior to suspend */ cpuret = rtas_offline_cpus_mask(offline_mask); -- cgit v1.2.3