rtmutex: Detect changes in the pi lock chain

commit 82084984383babe728e6e3c9a8e5c46278091315 upstream. When we walk the lock chain, we drop all locks after each step. So the lock chain can change under us before we reacquire the locks. That's harmless in principle as we just follow the wrong lock path. But it can lead to a false positive in the dead lock detection logic: T0 holds L0 T0 blocks on L1 held by T1 T1 blocks on L2 held by T2 T2 blocks on L3 held by T3 T4 blocks on L4 held by T4 Now we walk the chain lock T1 -> lock L2 -> adjust L2 -> unlock T1 -> lock T2 -> adjust T2 -> drop locks T2 times out and blocks on L0 Now we continue: lock T2 -> lock L0 -> deadlock detected, but it's not a deadlock at all. Brad tried to work around that in the deadlock detection logic itself, but the more I looked at it the less I liked it, because it's crystal ball magic after the fact. We actually can detect a chain change very simple: lock T1 -> lock L2 -> adjust L2 -> unlock T1 -> lock T2 -> adjust T2 -> next_lock = T2->pi_blocked_on->lock; drop locks T2 times out and blocks on L0 Now we continue: lock T2 -> if (next_lock != T2->pi_blocked_on->lock) return; So if we detect that T2 is now blocked on a different lock we stop the chain walk. That's also correct in the following scenario: lock T1 -> lock L2 -> adjust L2 -> unlock T1 -> lock T2 -> adjust T2 -> next_lock = T2->pi_blocked_on->lock; drop locks T3 times out and drops L3 T2 acquires L3 and blocks on L4 now Now we continue: lock T2 -> if (next_lock != T2->pi_blocked_on->lock) return; We don't have to follow up the chain at that point, because T2 propagated our priority up to T4 already. [ Folded a cleanup patch from peterz ] Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reported-by: Brad Mouring <bmouring@ni.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20140605152801.930031935@linutronix.de Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Thomas Gleixner <tglx@linutronix.de> 2014-06-05 11:16:12 +0200
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2014-07-17 15:39:50 -0700
commit: 307e2e09be9993d7fe403a7310b28ab2d8e2f6ce (patch)
tree: 63c91c95088bebcce1bf4df486b17aa5312c95d0
parent: 90b421b52720b30644e104da002505f08a77c07a (diff)
1 files changed, 59 insertions, 15 deletions
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 8447256eb1bc..dbc3f89c2d34 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -141,6 +141,11 @@ static void rt_mutex_adjust_prio(struct task_struct *task)
  */
 int max_lock_depth = 1024;
 
+static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
+{
+	return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
+}
+
 /*
  * Adjust the priority chain. Also used for deadlock detection.
  * Decreases task's usage by one - may thus free the task.
@@ -149,6 +154,7 @@ int max_lock_depth = 1024;
 static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 				      int deadlock_detect,
 				      struct rt_mutex *orig_lock,
+				      struct rt_mutex *next_lock,
 				      struct rt_mutex_waiter *orig_waiter,
 				      struct task_struct *top_task)
 {
@@ -207,6 +213,18 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 		goto out_unlock_pi;
 
 	/*
+	 * We dropped all locks after taking a refcount on @task, so
+	 * the task might have moved on in the lock chain or even left
+	 * the chain completely and blocks now on an unrelated lock or
+	 * on @orig_lock.
+	 *
+	 * We stored the lock on which @task was blocked in @next_lock,
+	 * so we can detect the chain change.
+	 */
+	if (next_lock != waiter->lock)
+		goto out_unlock_pi;
+
+	/*
 	 * Drop out, when the task has no waiters. Note,
 	 * top_waiter can be NULL, when we are in the deboosting
 	 * mode!
@@ -292,11 +310,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 		__rt_mutex_adjust_prio(task);
 	}
 
+	/*
+	 * Check whether the task which owns the current lock is pi
+	 * blocked itself. If yes we store a pointer to the lock for
+	 * the lock chain change detection above. After we dropped
+	 * task->pi_lock next_lock cannot be dereferenced anymore.
+	 */
+	next_lock = task_blocked_on_lock(task);
+
 	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 
 	top_waiter = rt_mutex_top_waiter(lock);
 	raw_spin_unlock(&lock->wait_lock);
 
+	/*
+	 * We reached the end of the lock chain. Stop right here. No
+	 * point to go back just to figure that out.
+	 */
+	if (!next_lock)
+		goto out_put_task;
+
 	if (!detect_deadlock && waiter != top_waiter)
 		goto out_put_task;
 
@@ -407,8 +440,9 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 {
 	struct task_struct *owner = rt_mutex_owner(lock);
 	struct rt_mutex_waiter *top_waiter = waiter;
-	unsigned long flags;
+	struct rt_mutex *next_lock;
 	int chain_walk = 0, res;
+	unsigned long flags;
 
 	/*
 	 * Early deadlock detection. We really don't want the task to
@@ -441,20 +475,28 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 	if (!owner)
 		return 0;
 
+	raw_spin_lock_irqsave(&owner->pi_lock, flags);
 	if (waiter == rt_mutex_top_waiter(lock)) {
-		raw_spin_lock_irqsave(&owner->pi_lock, flags);
 		plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
 		plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
 
 		__rt_mutex_adjust_prio(owner);
 		if (owner->pi_blocked_on)
 			chain_walk = 1;
-		raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
-	}
-	else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
+	} else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) {
 		chain_walk = 1;
+	}
 
-	if (!chain_walk)
+	/* Store the lock on which owner is blocked or NULL */
+	next_lock = task_blocked_on_lock(owner);
+
+	raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+	/*
+	 * Even if full deadlock detection is on, if the owner is not
+	 * blocked itself, we can avoid finding this out in the chain
+	 * walk.
+	 */
+	if (!chain_walk || !next_lock)
 		return 0;
 
 	/*
@@ -466,8 +508,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 
 	raw_spin_unlock(&lock->wait_lock);
 
-	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
-					 task);
+	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock,
+					 next_lock, waiter, task);
 
 	raw_spin_lock(&lock->wait_lock);
 
@@ -516,8 +558,8 @@ static void remove_waiter(struct rt_mutex *lock,
 {
 	int first = (waiter == rt_mutex_top_waiter(lock));
 	struct task_struct *owner = rt_mutex_owner(lock);
+	struct rt_mutex *next_lock = NULL;
 	unsigned long flags;
-	int chain_walk = 0;
 
 	raw_spin_lock_irqsave(&current->pi_lock, flags);
 	plist_del(&waiter->list_entry, &lock->wait_list);
@@ -541,15 +583,15 @@ static void remove_waiter(struct rt_mutex *lock,
 		}
 		__rt_mutex_adjust_prio(owner);
 
-		if (owner->pi_blocked_on)
-			chain_walk = 1;
+		/* Store the lock on which owner is blocked or NULL */
+		next_lock = task_blocked_on_lock(owner);
 
 		raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
 	}
 
 	WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
 
-	if (!chain_walk)
+	if (!next_lock)
 		return;
 
 	/* gets dropped in rt_mutex_adjust_prio_chain()! */
@@ -557,7 +599,7 @@ static void remove_waiter(struct rt_mutex *lock,
 
 	raw_spin_unlock(&lock->wait_lock);
 
-	rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
+	rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current);
 
 	raw_spin_lock(&lock->wait_lock);
 }
@@ -570,6 +612,7 @@ static void remove_waiter(struct rt_mutex *lock,
 void rt_mutex_adjust_pi(struct task_struct *task)
 {
 	struct rt_mutex_waiter *waiter;
+	struct rt_mutex *next_lock;
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&task->pi_lock, flags);
@@ -579,12 +622,13 @@ void rt_mutex_adjust_pi(struct task_struct *task)
 		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 		return;
 	}
-
+	next_lock = waiter->lock;
 	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 
 	/* gets dropped in rt_mutex_adjust_prio_chain()! */
 	get_task_struct(task);
-	rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
+
+	rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task);
 }
 
 /**
author	Thomas Gleixner <tglx@linutronix.de>	2014-06-05 11:16:12 +0200
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2014-07-17 15:39:50 -0700
commit	307e2e09be9993d7fe403a7310b28ab2d8e2f6ce (patch)
tree	63c91c95088bebcce1bf4df486b17aa5312c95d0
parent	90b421b52720b30644e104da002505f08a77c07a (diff)