summaryrefslogtreecommitdiff
path: root/drivers/thermal
diff options
context:
space:
mode:
authorDiwakar Tundlam <dtundlam@nvidia.com>2014-03-06 18:12:08 -0800
committerDiwakar Tundlam <dtundlam@nvidia.com>2014-03-10 16:17:12 -0700
commit918fa5def9fb9ec2ba608c95e06299debd2d9d89 (patch)
treee5f302490fd1046219fb12fe39c06f82f2433278 /drivers/thermal
parentf4b7b2d0d8cf10dd9a7d5befebc959897856cb38 (diff)
thermal: core: use cancel delayed work sync
This fixes an issue with race between a piece of delayed work in progress and its cancellation during sensor shutdown that happens as part of system power-down. We refactor the code a bit to make sure the cancel_sync is called outside the tz mutex to prevent deadlock. This was found during a stress test by continuously running warm-reboot in a loop. We found that eventually, we get a system hang or crash with an error message from a failed reading of the temp from the get_temp API which indicates a potential issue with the polling work function. We can accelerate the reproduction of the issue by introducing an artificial delay in the work function and forcing polling by setting a non-zero polling_delay for the thermal zone. In the actual system, the timing differences could be introduced from other unresolved errors in the I2C bus transactions which could show up during the cold and warm reboot stress tests. Bug 1463497 Change-Id: I58b8a90052850b0d67d531200d90ed2dc168e48c Signed-off-by: Diwakar Tundlam <dtundlam@nvidia.com> Cc: Paul Walmsley <pwalmsley@nvidia.com> Cc: Seema Khowala <seemaj@nvidia.com> Reviewed-on: http://git-master/r/378692 Reviewed-by: Aleksandr Frid <afrid@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Seema Khowala <seemaj@nvidia.com> Tested-by: Seema Khowala <seemaj@nvidia.com>
Diffstat (limited to 'drivers/thermal')
-rw-r--r--drivers/thermal/thermal_core.c45
1 files changed, 33 insertions, 12 deletions
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 72ac8a7770f6..3f038b1e4864 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -327,28 +327,49 @@ exit:
static void thermal_zone_device_set_polling(struct thermal_zone_device *tz,
int delay)
{
+ int jiffies;
+
+ if (delay <= 0)
+ return;
+
if (delay > 1000)
- mod_delayed_work(system_freezable_wq, &tz->poll_queue,
- round_jiffies(msecs_to_jiffies(delay)));
- else if (delay)
- mod_delayed_work(system_freezable_wq, &tz->poll_queue,
- msecs_to_jiffies(delay));
+ jiffies = round_jiffies(msecs_to_jiffies(delay));
else
- cancel_delayed_work(&tz->poll_queue);
+ jiffies = msecs_to_jiffies(delay);
+
+ mod_delayed_work(system_freezable_wq, &tz->poll_queue, jiffies);
+}
+
+/**
+ * thermal_zone_device_cancel_polling() - cancels polling of thermal zone.
+ * @tz: a valid pointer to a struct thermal_zone_device
+ *
+ * We should never call this routine while holding thermal zone mutex,
+ * or else it can potentially deadlock.
+ */
+static void thermal_zone_device_cancel_polling(struct thermal_zone_device *tz)
+{
+ cancel_delayed_work_sync(&tz->poll_queue);
}
static void monitor_thermal_zone(struct thermal_zone_device *tz)
{
+ int delay = 0;
+
mutex_lock(&tz->lock);
if (tz->passive)
- thermal_zone_device_set_polling(tz, tz->passive_delay);
- else if (tz->polling_delay)
- thermal_zone_device_set_polling(tz, tz->polling_delay);
+ delay = tz->passive_delay;
else
- thermal_zone_device_set_polling(tz, 0);
+ delay = tz->polling_delay;
+
+ if (delay > 0)
+ thermal_zone_device_set_polling(tz, delay);
mutex_unlock(&tz->lock);
+
+ if (delay <= 0)
+ thermal_zone_device_cancel_polling(tz);
}
static void handle_non_critical_trips(struct thermal_zone_device *tz,
@@ -1714,7 +1735,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
if (!tz->ops->get_temp)
- thermal_zone_device_set_polling(tz, 0);
+ thermal_zone_device_cancel_polling(tz);
thermal_zone_device_update(tz);
@@ -1780,7 +1801,7 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
mutex_unlock(&thermal_list_lock);
- thermal_zone_device_set_polling(tz, 0);
+ thermal_zone_device_cancel_polling(tz);
if (tz->type[0])
device_remove_file(&tz->device, &dev_attr_type);