summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-08-16 09:13:16 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-08-16 09:13:16 -0700
commit2d63ba3e41db3ceb0d23924ed2879b910276e24c (patch)
tree97f95a8c53b8d66277d2fe1ed05a5b4d2e93eb8b
parent9da5bb24bb368567a43ac2df0e108e43d80f3564 (diff)
parenta3ee2477c45f73184a64d9c6cf97855a52732dc6 (diff)
Merge tag 'pm-5.3-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management fixes from Rafael Wysocki: "These add a check to avoid recent suspend-to-idle power regression on systems with NVMe drives where the PCIe ASPM policy is "performance" (or when the kernel is built without ASPM support), fix an issue related to frequency limits in the schedutil cpufreq governor and fix a mistake related to the PM QoS usage in the cpufreq core introduced recently. Specifics: - Disable NVMe power optimization related to suspend-to-idle added recently on systems where PCIe ASPM is not able to put PCIe links into low-power states to prevent excess power from being drawn by the system while suspended (Rafael Wysocki). - Make the schedutil governor handle frequency limits changes properly in all cases (Viresh Kumar). - Prevent the cpufreq core from treating positive values returned by dev_pm_qos_update_request() as errors (Viresh Kumar)" * tag 'pm-5.3-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: nvme-pci: Allow PCI bus-level PM to be used if ASPM is disabled PCI/ASPM: Add pcie_aspm_enabled() cpufreq: schedutil: Don't skip freq update when limits change cpufreq: dev_pm_qos_update_request() can return 1 on success
-rw-r--r--drivers/cpufreq/cpufreq.c2
-rw-r--r--drivers/nvme/host/pci.c13
-rw-r--r--drivers/pci/pcie/aspm.c20
-rw-r--r--include/linux/pci.h2
-rw-r--r--kernel/sched/cpufreq_schedutil.c14
5 files changed, 43 insertions, 8 deletions
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 8dda62367816..c28ebf2810f1 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2528,7 +2528,7 @@ static int cpufreq_boost_set_sw(int state)
}
ret = dev_pm_qos_update_request(policy->max_freq_req, policy->max);
- if (ret)
+ if (ret < 0)
break;
}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index db160cee42ad..108e109e99f1 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2846,7 +2846,7 @@ static int nvme_resume(struct device *dev)
struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
struct nvme_ctrl *ctrl = &ndev->ctrl;
- if (pm_resume_via_firmware() || !ctrl->npss ||
+ if (ndev->last_ps == U32_MAX ||
nvme_set_power_state(ctrl, ndev->last_ps) != 0)
nvme_reset_ctrl(ctrl);
return 0;
@@ -2859,6 +2859,8 @@ static int nvme_suspend(struct device *dev)
struct nvme_ctrl *ctrl = &ndev->ctrl;
int ret = -EBUSY;
+ ndev->last_ps = U32_MAX;
+
/*
* The platform does not remove power for a kernel managed suspend so
* use host managed nvme power settings for lowest idle power if
@@ -2866,8 +2868,14 @@ static int nvme_suspend(struct device *dev)
* shutdown. But if the firmware is involved after the suspend or the
* device does not support any non-default power states, shut down the
* device fully.
+ *
+ * If ASPM is not enabled for the device, shut down the device and allow
+ * the PCI bus layer to put it into D3 in order to take the PCIe link
+ * down, so as to allow the platform to achieve its minimum low-power
+ * state (which may not be possible if the link is up).
*/
- if (pm_suspend_via_firmware() || !ctrl->npss) {
+ if (pm_suspend_via_firmware() || !ctrl->npss ||
+ !pcie_aspm_enabled(pdev)) {
nvme_dev_disable(ndev, true);
return 0;
}
@@ -2880,7 +2888,6 @@ static int nvme_suspend(struct device *dev)
ctrl->state != NVME_CTRL_ADMIN_ONLY)
goto unfreeze;
- ndev->last_ps = 0;
ret = nvme_get_power_state(ctrl, &ndev->last_ps);
if (ret < 0)
goto unfreeze;
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index e44af7f4d37f..464f8f92653f 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -1170,6 +1170,26 @@ static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp)
module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy,
NULL, 0644);
+/**
+ * pcie_aspm_enabled - Check if PCIe ASPM has been enabled for a device.
+ * @pdev: Target device.
+ */
+bool pcie_aspm_enabled(struct pci_dev *pdev)
+{
+ struct pci_dev *bridge = pci_upstream_bridge(pdev);
+ bool ret;
+
+ if (!bridge)
+ return false;
+
+ mutex_lock(&aspm_lock);
+ ret = bridge->link_state ? !!bridge->link_state->aspm_enabled : false;
+ mutex_unlock(&aspm_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(pcie_aspm_enabled);
+
#ifdef CONFIG_PCIEASPM_DEBUG
static ssize_t link_state_show(struct device *dev,
struct device_attribute *attr,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9e700d9f9f28..82e4cd1b7ac3 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1567,8 +1567,10 @@ extern bool pcie_ports_native;
#ifdef CONFIG_PCIEASPM
bool pcie_aspm_support_enabled(void);
+bool pcie_aspm_enabled(struct pci_dev *pdev);
#else
static inline bool pcie_aspm_support_enabled(void) { return false; }
+static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; }
#endif
#ifdef CONFIG_PCIEAER
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 636ca6f88c8e..867b4bb6d4be 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -40,6 +40,7 @@ struct sugov_policy {
struct task_struct *thread;
bool work_in_progress;
+ bool limits_changed;
bool need_freq_update;
};
@@ -89,8 +90,11 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
!cpufreq_this_cpu_can_update(sg_policy->policy))
return false;
- if (unlikely(sg_policy->need_freq_update))
+ if (unlikely(sg_policy->limits_changed)) {
+ sg_policy->limits_changed = false;
+ sg_policy->need_freq_update = true;
return true;
+ }
delta_ns = time - sg_policy->last_freq_update_time;
@@ -437,7 +441,7 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
{
if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
- sg_policy->need_freq_update = true;
+ sg_policy->limits_changed = true;
}
static void sugov_update_single(struct update_util_data *hook, u64 time,
@@ -457,7 +461,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
if (!sugov_should_update_freq(sg_policy, time))
return;
- busy = sugov_cpu_is_busy(sg_cpu);
+ /* Limits may have changed, don't skip frequency update */
+ busy = !sg_policy->need_freq_update && sugov_cpu_is_busy(sg_cpu);
util = sugov_get_util(sg_cpu);
max = sg_cpu->max;
@@ -831,6 +836,7 @@ static int sugov_start(struct cpufreq_policy *policy)
sg_policy->last_freq_update_time = 0;
sg_policy->next_freq = 0;
sg_policy->work_in_progress = false;
+ sg_policy->limits_changed = false;
sg_policy->need_freq_update = false;
sg_policy->cached_raw_freq = 0;
@@ -879,7 +885,7 @@ static void sugov_limits(struct cpufreq_policy *policy)
mutex_unlock(&sg_policy->work_lock);
}
- sg_policy->need_freq_update = true;
+ sg_policy->limits_changed = true;
}
struct cpufreq_governor schedutil_gov = {