From d8c216cfa57e8a579f41729cbb88c97835d9ac8d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 8 Jan 2011 00:29:20 +0100 Subject: cpuidle: Make cpuidle_enable_device() call poll_idle_init() The following scenario is possible with the current cpuidle code and the ACPI cpuidle driver: (1) acpi_processor_cst_has_changed() is called, (2) cpuidle_disable_device() is called, (3) cpuidle_remove_state_sysfs() is called to remove the (presumably outdated) states info from sysfs, (3) acpi_processor_get_power_info() is called, the first entry in the pr->power.states[] table is filled with zeros, (4) acpi_processor_setup_cpuidle() is called and it doesn't fill the first entry in pr->power.states[], (5) cpuidle_enable_device() is called, (6) __cpuidle_register_device() is _not_ called, since the device has already been registered, (7) Consequently, poll_idle_init() is _not_ called either, (8) cpuidle_add_state_sysfs() is called to create the sysfs attributes for the new states and it uses the bogus first table entry from acpi_processor_get_power_info() for creating state0. This problem is avoided if cpuidle_enable_device() unconditionally calls poll_idle_init(). Reported-by: Len Brown Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown cc: stable@kernel.org --- drivers/cpuidle/cpuidle.c | 82 +++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 41 deletions(-) (limited to 'drivers/cpuidle') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index a50710843378..97df791c74cb 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -154,6 +154,45 @@ void cpuidle_resume_and_unlock(void) EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock); +#ifdef CONFIG_ARCH_HAS_CPU_RELAX +static int poll_idle(struct cpuidle_device *dev, struct cpuidle_state *st) +{ + ktime_t t1, t2; + s64 diff; + int ret; + + t1 = ktime_get(); + local_irq_enable(); + while (!need_resched()) + cpu_relax(); + + t2 = ktime_get(); + diff = ktime_to_us(ktime_sub(t2, t1)); + if (diff > INT_MAX) + diff = INT_MAX; + + ret = (int) diff; + return ret; +} + +static void poll_idle_init(struct cpuidle_device *dev) +{ + struct cpuidle_state *state = &dev->states[0]; + + cpuidle_set_statedata(state, NULL); + + snprintf(state->name, CPUIDLE_NAME_LEN, "C0"); + snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); + state->exit_latency = 0; + state->target_residency = 0; + state->power_usage = -1; + state->flags = CPUIDLE_FLAG_POLL; + state->enter = poll_idle; +} +#else +static void poll_idle_init(struct cpuidle_device *dev) {} +#endif /* CONFIG_ARCH_HAS_CPU_RELAX */ + /** * cpuidle_enable_device - enables idle PM for a CPU * @dev: the CPU @@ -178,6 +217,8 @@ int cpuidle_enable_device(struct cpuidle_device *dev) return ret; } + poll_idle_init(dev); + if ((ret = cpuidle_add_state_sysfs(dev))) return ret; @@ -232,45 +273,6 @@ void cpuidle_disable_device(struct cpuidle_device *dev) EXPORT_SYMBOL_GPL(cpuidle_disable_device); -#ifdef CONFIG_ARCH_HAS_CPU_RELAX -static int poll_idle(struct cpuidle_device *dev, struct cpuidle_state *st) -{ - ktime_t t1, t2; - s64 diff; - int ret; - - t1 = ktime_get(); - local_irq_enable(); - while (!need_resched()) - cpu_relax(); - - t2 = ktime_get(); - diff = ktime_to_us(ktime_sub(t2, t1)); - if (diff > INT_MAX) - diff = INT_MAX; - - ret = (int) diff; - return ret; -} - -static void poll_idle_init(struct cpuidle_device *dev) -{ - struct cpuidle_state *state = &dev->states[0]; - - cpuidle_set_statedata(state, NULL); - - snprintf(state->name, CPUIDLE_NAME_LEN, "C0"); - snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); - state->exit_latency = 0; - state->target_residency = 0; - state->power_usage = -1; - state->flags = CPUIDLE_FLAG_POLL; - state->enter = poll_idle; -} -#else -static void poll_idle_init(struct cpuidle_device *dev) {} -#endif /* CONFIG_ARCH_HAS_CPU_RELAX */ - /** * __cpuidle_register_device - internal register function called before register * and enable routines @@ -291,8 +293,6 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) init_completion(&dev->kobj_unregister); - poll_idle_init(dev); - /* * cpuidle driver should set the dev->power_specified bit * before registering the device if the driver provides -- cgit v1.2.3 From 720f1c3010db6a411358b962a2007969117840bc Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 7 Jan 2011 11:29:43 +0100 Subject: cpuidle: Rename X86 specific idle poll state[0] from C0 to POLL C0 means and is well know as "not idle". All documentation out there uses this term as "running"/"not idle" state. Also Linux userspace tools (e.g. cpufreq-aperf and turbostat) show C0 residency which there is correct, but means something totally else than cpuidle "POLL" state. Signed-off-by: Thomas Renninger Signed-off-by: Len Brown --- drivers/cpuidle/cpuidle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/cpuidle') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 97df791c74cb..37e446041306 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -181,7 +181,7 @@ static void poll_idle_init(struct cpuidle_device *dev) cpuidle_set_statedata(state, NULL); - snprintf(state->name, CPUIDLE_NAME_LEN, "C0"); + snprintf(state->name, CPUIDLE_NAME_LEN, "POLL"); snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); state->exit_latency = 0; state->target_residency = 0; -- cgit v1.2.3 From d247632c08c674864d438733280422ddb7130ff8 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 12 Jan 2011 02:34:59 -0500 Subject: cpuidle: delete NOP CPUIDLE_FLAG_POLL it serves no purpose Signed-off-by: Len Brown --- drivers/cpuidle/cpuidle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/cpuidle') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 37e446041306..dd5f1eafd6f2 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -186,7 +186,7 @@ static void poll_idle_init(struct cpuidle_device *dev) state->exit_latency = 0; state->target_residency = 0; state->power_usage = -1; - state->flags = CPUIDLE_FLAG_POLL; + state->flags = 0; state->enter = poll_idle; } #else -- cgit v1.2.3 From f77cfe4ea21760268c0277fa3e4b02dfd2a2c2f4 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 7 Jan 2011 11:29:44 +0100 Subject: cpuidle/x86/perf: fix power:cpu_idle double end events and throw cpu_idle events from the cpuidle layer Currently intel_idle and acpi_idle driver show double cpu_idle "exit idle" events -> this patch fixes it and makes cpu_idle events throwing less complex. It also introduces cpu_idle events for all architectures which use the cpuidle subsystem, namely: - arch/arm/mach-at91/cpuidle.c - arch/arm/mach-davinci/cpuidle.c - arch/arm/mach-kirkwood/cpuidle.c - arch/arm/mach-omap2/cpuidle34xx.c - arch/drivers/acpi/processor_idle.c (for all cases, not only mwait) - arch/x86/kernel/process.c (did throw events before, but was a mess) - drivers/idle/intel_idle.c (did throw events before) Convention should be: Fire cpu_idle events inside the current pm_idle function (not somewhere down the the callee tree) to keep things easy. Current possible pm_idle functions in X86: c1e_idle, poll_idle, cpuidle_idle_call, mwait_idle, default_idle -> this is really easy is now. This affects userspace: The type field of the cpu_idle power event can now direclty get mapped to: /sys/devices/system/cpu/cpuX/cpuidle/stateX/{name,desc,usage,time,...} instead of throwing very CPU/mwait specific values. This change is not visible for the intel_idle driver. For the acpi_idle driver it should only be visible if the vendor misses out C-states in his BIOS. Another (perf timechart) patch reads out cpuidle info of cpu_idle events from: /sys/.../cpuidle/stateX/*, then the cpuidle events are mapped to the correct C-/cpuidle state again, even if e.g. vendors miss out C-states in their BIOS and for example only export C1 and C3. -> everything is fine. Signed-off-by: Thomas Renninger CC: Robert Schoene CC: Jean Pihet CC: Arjan van de Ven CC: Ingo Molnar CC: Frederic Weisbecker CC: linux-pm@lists.linux-foundation.org CC: linux-acpi@vger.kernel.org CC: linux-kernel@vger.kernel.org CC: linux-perf-users@vger.kernel.org CC: linux-omap@vger.kernel.org Signed-off-by: Len Brown --- drivers/cpuidle/cpuidle.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/cpuidle') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 386888f10df0..e4855c33f897 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -96,7 +96,15 @@ static void cpuidle_idle_call(void) /* enter the state and update stats */ dev->last_state = target_state; + + trace_power_start(POWER_CSTATE, next_state, dev->cpu); + trace_cpu_idle(next_state, dev->cpu); + dev->last_residency = target_state->enter(dev, target_state); + + trace_power_end(dev->cpu); + trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); + if (dev->last_state) target_state = dev->last_state; @@ -106,8 +114,6 @@ static void cpuidle_idle_call(void) /* give the governor an opportunity to reflect on the outcome */ if (cpuidle_curr_governor->reflect) cpuidle_curr_governor->reflect(dev); - trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } /** -- cgit v1.2.3