From cb83b629bae0327cf9f44f096adc38d150ceb913 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Apr 2012 15:49:36 +0200 Subject: sched/numa: Rewrite the CONFIG_NUMA sched domain support The current code groups up to 16 nodes in a level and then puts an ALLNODES domain spanning the entire tree on top of that. This doesn't reflect the numa topology and esp for the smaller not-fully-connected machines out there today this might make a difference. Therefore, build a proper numa topology based on node_distance(). Since there's no fixed numa layers anymore, the static SD_NODE_INIT and SD_ALLNODES_INIT aren't usable anymore, the new code tries to construct something similar and scales some values either on the number of cpus in the domain and/or the node_distance() ratio. Signed-off-by: Peter Zijlstra Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: Chris Metcalf Cc: David Howells Cc: "David S. Miller" Cc: Fenghua Yu Cc: "H. Peter Anvin" Cc: Ivan Kokshaysky Cc: linux-alpha@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-sh@vger.kernel.org Cc: Matt Turner Cc: Paul Mackerras Cc: Paul Mundt Cc: Ralf Baechle Cc: Richard Henderson Cc: sparclinux@vger.kernel.org Cc: Tony Luck Cc: x86@kernel.org Cc: Dimitri Sivanich Cc: Greg Pearson Cc: KAMEZAWA Hiroyuki Cc: bob.picco@oracle.com Cc: chris.mason@oracle.com Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/n/tip-r74n3n8hhuc2ynbrnp3vt954@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/topology.h | 37 ------------------------------------- 1 file changed, 37 deletions(-) (limited to 'include/linux/topology.h') diff --git a/include/linux/topology.h b/include/linux/topology.h index e26db031303b..4f59bf36f0af 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -70,7 +70,6 @@ int arch_update_cpu_topology(void); * Below are the 3 major initializers used in building sched_domains: * SD_SIBLING_INIT, for SMT domains * SD_CPU_INIT, for SMP domains - * SD_NODE_INIT, for NUMA domains * * Any architecture that cares to do any tuning to these values should do so * by defining their own arch-specific initializer in include/asm/topology.h. @@ -176,48 +175,12 @@ int arch_update_cpu_topology(void); } #endif -/* sched_domains SD_ALLNODES_INIT for NUMA machines */ -#define SD_ALLNODES_INIT (struct sched_domain) { \ - .min_interval = 64, \ - .max_interval = 64*num_online_cpus(), \ - .busy_factor = 128, \ - .imbalance_pct = 133, \ - .cache_nice_tries = 1, \ - .busy_idx = 3, \ - .idle_idx = 3, \ - .flags = 1*SD_LOAD_BALANCE \ - | 1*SD_BALANCE_NEWIDLE \ - | 0*SD_BALANCE_EXEC \ - | 0*SD_BALANCE_FORK \ - | 0*SD_BALANCE_WAKE \ - | 0*SD_WAKE_AFFINE \ - | 0*SD_SHARE_CPUPOWER \ - | 0*SD_POWERSAVINGS_BALANCE \ - | 0*SD_SHARE_PKG_RESOURCES \ - | 1*SD_SERIALIZE \ - | 0*SD_PREFER_SIBLING \ - , \ - .last_balance = jiffies, \ - .balance_interval = 64, \ -} - -#ifndef SD_NODES_PER_DOMAIN -#define SD_NODES_PER_DOMAIN 16 -#endif - #ifdef CONFIG_SCHED_BOOK #ifndef SD_BOOK_INIT #error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!! #endif #endif /* CONFIG_SCHED_BOOK */ -#ifdef CONFIG_NUMA -#ifndef SD_NODE_INIT -#error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! -#endif - -#endif /* CONFIG_NUMA */ - #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID DECLARE_PER_CPU(int, numa_node); -- cgit v1.2.3 From 8e7fbcbc22c12414bcc9dfdd683637f58fb32759 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 9 Jan 2012 11:28:35 +0100 Subject: sched: Remove stale power aware scheduling remnants and dysfunctional knobs It's been broken forever (i.e. it's not scheduling in a power aware fashion), as reported by Suresh and others sending patches, and nobody cares enough to fix it properly ... so remove it to make space free for something better. There's various problems with the code as it stands today, first and foremost the user interface which is bound to topology levels and has multiple values per level. This results in a state explosion which the administrator or distro needs to master and almost nobody does. Furthermore large configuration state spaces aren't good, it means the thing doesn't just work right because it's either under so many impossibe to meet constraints, or even if there's an achievable state workloads have to be aware of it precisely and can never meet it for dynamic workloads. So pushing this kind of decision to user-space was a bad idea even with a single knob - it's exponentially worse with knobs on every node of the topology. There is a proposal to replace the user interface with a single 3 state knob: sched_balance_policy := { performance, power, auto } where 'auto' would be the preferred default which looks at things like Battery/AC mode and possible cpufreq state or whatever the hw exposes to show us power use expectations - but there's been no progress on it in the past many months. Aside from that, the actual implementation of the various knobs is known to be broken. There have been sporadic attempts at fixing things but these always stop short of reaching a mergable state. Therefore this wholesale removal with the hopes of spurring people who care to come forward once again and work on a coherent replacement. Signed-off-by: Peter Zijlstra Cc: Suresh Siddha Cc: Arjan van de Ven Cc: Vincent Guittot Cc: Vaidyanathan Srinivasan Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1326104915.2442.53.camel@twins Signed-off-by: Ingo Molnar --- include/linux/topology.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux/topology.h') diff --git a/include/linux/topology.h b/include/linux/topology.h index 4f59bf36f0af..09558d1daacd 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -98,7 +98,6 @@ int arch_update_cpu_topology(void); | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ | 1*SD_SHARE_CPUPOWER \ - | 0*SD_POWERSAVINGS_BALANCE \ | 1*SD_SHARE_PKG_RESOURCES \ | 0*SD_SERIALIZE \ | 0*SD_PREFER_SIBLING \ @@ -134,8 +133,6 @@ int arch_update_cpu_topology(void); | 0*SD_SHARE_CPUPOWER \ | 1*SD_SHARE_PKG_RESOURCES \ | 0*SD_SERIALIZE \ - | sd_balance_for_mc_power() \ - | sd_power_saving_flags() \ , \ .last_balance = jiffies, \ .balance_interval = 1, \ @@ -167,8 +164,6 @@ int arch_update_cpu_topology(void); | 0*SD_SHARE_CPUPOWER \ | 0*SD_SHARE_PKG_RESOURCES \ | 0*SD_SERIALIZE \ - | sd_balance_for_package_power() \ - | sd_power_saving_flags() \ , \ .last_balance = jiffies, \ .balance_interval = 1, \ -- cgit v1.2.3