From 61165d7a035f6571c7576e7f51e7230157724c8d Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 13 May 2008 14:26:57 +0100 Subject: x86: fix app crashes after SMP resume After resume on a 2cpu laptop, kernel builds collapse with a sed hang, sh or make segfault (often on 20295564), real-time signal to cc1 etc. Several hurdles to jump, but a manually-assisted bisect led to -rc1's d2bcbad5f3ad38a1c09861bca7e252dde7bb8259 x86: do not zap_low_mappings in __smp_prepare_cpus. Though the low mappings were removed at bootup, they were left behind (with Global flags helping to keep them in TLB) after resume or cpu online, causing the crashes seen. Reinstate zap_low_mappings (with local __flush_tlb_all) for each cpu_up on x86_32. This used to be serialized by smp_commenced_mask: that's now gone, but a low_mappings flag will do. No need for native_smp_cpus_done to repeat the zap: let mem_init zap BSP's low mappings just like on UP. (In passing, fix error code from native_cpu_up: do_boot_cpu returns a variety of diagnostic values, Dprintk what it says but convert to -EIO. And save_pg_dir separately before zap_low_mappings: doesn't matter now, but zapping twice in succession wiped out resume's swsusp_pg_dir.) That worked well on the duo and one quad, but wouldn't boot 3rd or 4th cpu on P4 Xeon, oopsing just after unlock_ipi_call_lock. The TLB flush IPI now being sent reveals a long-standing bug: the booting cpu has its APIC readied in smp_callin at the top of start_secondary, but isn't put into the cpu_online_map until just before that unlock_ipi_call_lock. So native_smp_call_function_mask to online cpus would send_IPI_allbutself, including the cpu just coming up, though it has been excluded from the count to wait for: by the time it handles the IPI, the call data on native_smp_call_function_mask's stack may well have been overwritten. So fall back to send_IPI_mask while cpu_online_map does not match cpu_callout_map: perhaps there's a better APICological fix to be made at the start_secondary end, but I wouldn't know that. Signed-off-by: Hugh Dickins Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6b087ab6cd8f..38988491c622 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -86,6 +86,7 @@ void *x86_bios_cpu_apicid_early_ptr; #ifdef CONFIG_X86_32 u8 apicid_2_node[MAX_APICID]; +static int low_mappings; #endif /* State of each CPU */ @@ -326,6 +327,12 @@ static void __cpuinit start_secondary(void *unused) enable_8259A_irq(0); } +#ifdef CONFIG_X86_32 + while (low_mappings) + cpu_relax(); + __flush_tlb_all(); +#endif + /* This must be done before setting cpu_online_map */ set_cpu_sibling_map(raw_smp_processor_id()); wmb(); @@ -1040,14 +1047,20 @@ int __cpuinit native_cpu_up(unsigned int cpu) #ifdef CONFIG_X86_32 /* init low mem mapping */ clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); + min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); flush_tlb_all(); -#endif + low_mappings = 1; err = do_boot_cpu(apicid, cpu); - if (err < 0) { + + zap_low_mappings(); + low_mappings = 0; +#else + err = do_boot_cpu(apicid, cpu); +#endif + if (err) { Dprintk("do_boot_cpu failed %d\n", err); - return err; + return -EIO; } /* @@ -1259,9 +1272,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) setup_ioapic_dest(); #endif check_nmi_watchdog(); -#ifdef CONFIG_X86_32 - zap_low_mappings(); -#endif } #ifdef CONFIG_HOTPLUG_CPU -- cgit v1.2.3 From deef325086c3897393b8f7d6bccd03405244fe18 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 15:44:38 +0200 Subject: x86: disable preemption in native_smp_prepare_cpus Priit Laes reported the following warning: Call Trace: [] warn_on_slowpath+0x51/0x63 [] sys_ioctl+0x2d/0x5d [] _spin_lock+0xe/0x24 [] task_rq_lock+0x3d/0x73 [] set_cpu_sibling_map+0x336/0x350 [] read_apic_id+0x30/0x62 [] verify_local_APIC+0x90/0x138 [] native_smp_prepare_cpus+0x1f9/0x305 [] kernel_init+0x59/0x2d9 [] _spin_unlock_irq+0x11/0x2b [] child_rip+0xa/0x12 [] kernel_init+0x0/0x2d9 [] child_rip+0x0/0x12 fix this by generally disabling preemption in native_smp_prepare_cpus(). Reported-and-bisected-by: Priit Laes Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/smpboot.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/smpboot.c') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 38988491c622..56078d61c793 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1190,6 +1190,7 @@ static void __init smp_cpu_index_default(void) */ void __init native_smp_prepare_cpus(unsigned int max_cpus) { + preempt_disable(); nmi_watchdog_default(); smp_cpu_index_default(); current_cpu_data = boot_cpu_data; @@ -1206,7 +1207,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); disable_smp(); - return; + goto out; } preempt_disable(); @@ -1246,6 +1247,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) printk(KERN_INFO "CPU%d: ", 0); print_cpu_info(&cpu_data(0)); setup_boot_clock(); +out: + preempt_enable(); } /* * Early setup to make printk work. -- cgit v1.2.3