summaryrefslogtreecommitdiff
path: root/kernel/futex.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c231
1 files changed, 98 insertions, 133 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index fe90164aa6ec..688b6fcb79cb 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -240,7 +240,7 @@ struct futex_q {
struct plist_node list;
struct task_struct *task;
- spinlock_t *lock_ptr;
+ raw_spinlock_t *lock_ptr;
union futex_key key;
struct futex_pi_state *pi_state;
struct rt_mutex_waiter *rt_waiter;
@@ -261,7 +261,7 @@ static const struct futex_q futex_q_init = {
*/
struct futex_hash_bucket {
atomic_t waiters;
- spinlock_t lock;
+ raw_spinlock_t lock;
struct plist_head chain;
} ____cacheline_aligned_in_smp;
@@ -822,13 +822,13 @@ static void get_pi_state(struct futex_pi_state *pi_state)
* Drops a reference to the pi_state object and frees or caches it
* when the last reference is gone.
*/
-static void put_pi_state(struct futex_pi_state *pi_state)
+static struct futex_pi_state *__put_pi_state(struct futex_pi_state *pi_state)
{
if (!pi_state)
- return;
+ return NULL;
if (!atomic_dec_and_test(&pi_state->refcount))
- return;
+ return NULL;
/*
* If pi_state->owner is NULL, the owner is most probably dying
@@ -848,9 +848,7 @@ static void put_pi_state(struct futex_pi_state *pi_state)
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
}
- if (current->pi_state_cache) {
- kfree(pi_state);
- } else {
+ if (!current->pi_state_cache) {
/*
* pi_state->list is already empty.
* clear pi_state->owner.
@@ -859,6 +857,30 @@ static void put_pi_state(struct futex_pi_state *pi_state)
pi_state->owner = NULL;
atomic_set(&pi_state->refcount, 1);
current->pi_state_cache = pi_state;
+ pi_state = NULL;
+ }
+ return pi_state;
+}
+
+static void put_pi_state(struct futex_pi_state *pi_state)
+{
+ kfree(__put_pi_state(pi_state));
+}
+
+static void put_pi_state_atomic(struct futex_pi_state *pi_state,
+ struct list_head *to_free)
+{
+ if (__put_pi_state(pi_state))
+ list_add(&pi_state->list, to_free);
+}
+
+static void free_pi_state_list(struct list_head *to_free)
+{
+ struct futex_pi_state *p, *next;
+
+ list_for_each_entry_safe(p, next, to_free, list) {
+ list_del(&p->list);
+ kfree(p);
}
}
@@ -875,6 +897,7 @@ void exit_pi_state_list(struct task_struct *curr)
struct futex_pi_state *pi_state;
struct futex_hash_bucket *hb;
union futex_key key = FUTEX_KEY_INIT;
+ LIST_HEAD(to_free);
if (!futex_cmpxchg_enabled)
return;
@@ -908,7 +931,7 @@ void exit_pi_state_list(struct task_struct *curr)
}
raw_spin_unlock_irq(&curr->pi_lock);
- spin_lock(&hb->lock);
+ raw_spin_lock(&hb->lock);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
raw_spin_lock(&curr->pi_lock);
/*
@@ -918,10 +941,8 @@ void exit_pi_state_list(struct task_struct *curr)
if (head->next != next) {
/* retain curr->pi_lock for the loop invariant */
raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
- raw_spin_unlock_irq(&curr->pi_lock);
- spin_unlock(&hb->lock);
- raw_spin_lock_irq(&curr->pi_lock);
- put_pi_state(pi_state);
+ raw_spin_unlock(&hb->lock);
+ put_pi_state_atomic(pi_state, &to_free);
continue;
}
@@ -932,7 +953,7 @@ void exit_pi_state_list(struct task_struct *curr)
raw_spin_unlock(&curr->pi_lock);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
- spin_unlock(&hb->lock);
+ raw_spin_unlock(&hb->lock);
rt_mutex_futex_unlock(&pi_state->pi_mutex);
put_pi_state(pi_state);
@@ -940,6 +961,8 @@ void exit_pi_state_list(struct task_struct *curr)
raw_spin_lock_irq(&curr->pi_lock);
}
raw_spin_unlock_irq(&curr->pi_lock);
+
+ free_pi_state_list(&to_free);
}
#endif
@@ -1426,7 +1449,7 @@ static void __unqueue_futex(struct futex_q *q)
{
struct futex_hash_bucket *hb;
- if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
+ if (WARN_ON_SMP(!q->lock_ptr || !raw_spin_is_locked(q->lock_ptr))
|| WARN_ON(plist_node_empty(&q->list)))
return;
@@ -1554,21 +1577,21 @@ static inline void
double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
{
if (hb1 <= hb2) {
- spin_lock(&hb1->lock);
+ raw_spin_lock(&hb1->lock);
if (hb1 < hb2)
- spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
+ raw_spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
} else { /* hb1 > hb2 */
- spin_lock(&hb2->lock);
- spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
+ raw_spin_lock(&hb2->lock);
+ raw_spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
}
}
static inline void
double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
{
- spin_unlock(&hb1->lock);
+ raw_spin_unlock(&hb1->lock);
if (hb1 != hb2)
- spin_unlock(&hb2->lock);
+ raw_spin_unlock(&hb2->lock);
}
/*
@@ -1596,7 +1619,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
if (!hb_waiters_pending(hb))
goto out_put_key;
- spin_lock(&hb->lock);
+ raw_spin_lock(&hb->lock);
plist_for_each_entry_safe(this, next, &hb->chain, list) {
if (match_futex (&this->key, &key)) {
@@ -1615,7 +1638,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
}
}
- spin_unlock(&hb->lock);
+ raw_spin_unlock(&hb->lock);
wake_up_q(&wake_q);
out_put_key:
put_futex_key(&key);
@@ -1922,6 +1945,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
struct futex_hash_bucket *hb1, *hb2;
struct futex_q *this, *next;
DEFINE_WAKE_Q(wake_q);
+ LIST_HEAD(to_free);
if (nr_wake < 0 || nr_requeue < 0)
return -EINVAL;
@@ -2149,16 +2173,6 @@ retry_private:
requeue_pi_wake_futex(this, &key2, hb2);
drop_count++;
continue;
- } else if (ret == -EAGAIN) {
- /*
- * Waiter was woken by timeout or
- * signal and has set pi_blocked_on to
- * PI_WAKEUP_INPROGRESS before we
- * tried to enqueue it on the rtmutex.
- */
- this->pi_state = NULL;
- put_pi_state(pi_state);
- continue;
} else if (ret) {
/*
* rt_mutex_start_proxy_lock() detected a
@@ -2169,7 +2183,7 @@ retry_private:
* object.
*/
this->pi_state = NULL;
- put_pi_state(pi_state);
+ put_pi_state_atomic(pi_state, &to_free);
/*
* We stop queueing more waiters and let user
* space deal with the mess.
@@ -2186,7 +2200,7 @@ retry_private:
* in futex_proxy_trylock_atomic() or in lookup_pi_state(). We
* need to drop it here again.
*/
- put_pi_state(pi_state);
+ put_pi_state_atomic(pi_state, &to_free);
out_unlock:
double_unlock_hb(hb1, hb2);
@@ -2207,6 +2221,7 @@ out_put_keys:
out_put_key1:
put_futex_key(&key1);
out:
+ free_pi_state_list(&to_free);
return ret ? ret : task_count;
}
@@ -2230,7 +2245,7 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
q->lock_ptr = &hb->lock;
- spin_lock(&hb->lock); /* implies smp_mb(); (A) */
+ raw_spin_lock(&hb->lock); /* implies smp_mb(); (A) */
return hb;
}
@@ -2238,7 +2253,7 @@ static inline void
queue_unlock(struct futex_hash_bucket *hb)
__releases(&hb->lock)
{
- spin_unlock(&hb->lock);
+ raw_spin_unlock(&hb->lock);
hb_waiters_dec(hb);
}
@@ -2277,7 +2292,7 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
__releases(&hb->lock)
{
__queue_me(q, hb);
- spin_unlock(&hb->lock);
+ raw_spin_unlock(&hb->lock);
}
/**
@@ -2293,41 +2308,41 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
*/
static int unqueue_me(struct futex_q *q)
{
- spinlock_t *lock_ptr;
+ raw_spinlock_t *lock_ptr;
int ret = 0;
/* In the common case we don't take the spinlock, which is nice. */
retry:
/*
- * q->lock_ptr can change between this read and the following spin_lock.
- * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
- * optimizing lock_ptr out of the logic below.
+ * q->lock_ptr can change between this read and the following
+ * raw_spin_lock. Use READ_ONCE to forbid the compiler from reloading
+ * q->lock_ptr and optimizing lock_ptr out of the logic below.
*/
lock_ptr = READ_ONCE(q->lock_ptr);
if (lock_ptr != NULL) {
- spin_lock(lock_ptr);
+ raw_spin_lock(lock_ptr);
/*
* q->lock_ptr can change between reading it and
- * spin_lock(), causing us to take the wrong lock. This
+ * raw_spin_lock(), causing us to take the wrong lock. This
* corrects the race condition.
*
* Reasoning goes like this: if we have the wrong lock,
* q->lock_ptr must have changed (maybe several times)
- * between reading it and the spin_lock(). It can
- * change again after the spin_lock() but only if it was
- * already changed before the spin_lock(). It cannot,
+ * between reading it and the raw_spin_lock(). It can
+ * change again after the raw_spin_lock() but only if it was
+ * already changed before the raw_spin_lock(). It cannot,
* however, change back to the original value. Therefore
* we can detect whether we acquired the correct lock.
*/
if (unlikely(lock_ptr != q->lock_ptr)) {
- spin_unlock(lock_ptr);
+ raw_spin_unlock(lock_ptr);
goto retry;
}
__unqueue_futex(q);
BUG_ON(q->pi_state);
- spin_unlock(lock_ptr);
+ raw_spin_unlock(lock_ptr);
ret = 1;
}
@@ -2343,13 +2358,16 @@ retry:
static void unqueue_me_pi(struct futex_q *q)
__releases(q->lock_ptr)
{
+ struct futex_pi_state *ps;
+
__unqueue_futex(q);
BUG_ON(!q->pi_state);
- put_pi_state(q->pi_state);
+ ps = __put_pi_state(q->pi_state);
q->pi_state = NULL;
- spin_unlock(q->lock_ptr);
+ raw_spin_unlock(q->lock_ptr);
+ kfree(ps);
}
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
@@ -2482,7 +2500,7 @@ retry:
*/
handle_err:
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
- spin_unlock(q->lock_ptr);
+ raw_spin_unlock(q->lock_ptr);
switch (err) {
case -EFAULT:
@@ -2500,7 +2518,7 @@ handle_err:
break;
}
- spin_lock(q->lock_ptr);
+ raw_spin_lock(q->lock_ptr);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
/*
@@ -2596,7 +2614,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
/*
* The task state is guaranteed to be set before another task can
* wake it. set_current_state() is implemented using smp_store_mb() and
- * queue_me() calls spin_unlock() upon completion, both serializing
+ * queue_me() calls raw_spin_unlock() upon completion, both serializing
* access to the hash list and forcing another memory barrier.
*/
set_current_state(TASK_INTERRUPTIBLE);
@@ -2887,15 +2905,7 @@ retry_private:
* before __rt_mutex_start_proxy_lock() is done.
*/
raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
- /*
- * the migrate_disable() here disables migration in the in_atomic() fast
- * path which is enabled again in the following spin_unlock(). We have
- * one migrate_disable() pending in the slow-path which is reversed
- * after the raw_spin_unlock_irq() where we leave the atomic context.
- */
- migrate_disable();
-
- spin_unlock(q.lock_ptr);
+ raw_spin_unlock(q.lock_ptr);
/*
* __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
* such that futex_unlock_pi() is guaranteed to observe the waiter when
@@ -2903,7 +2913,6 @@ retry_private:
*/
ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
- migrate_enable();
if (ret) {
if (ret == 1)
@@ -2917,7 +2926,7 @@ retry_private:
ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
cleanup:
- spin_lock(q.lock_ptr);
+ raw_spin_lock(q.lock_ptr);
/*
* If we failed to acquire the lock (deadlock/signal/timeout), we must
* first acquire the hb->lock before removing the lock from the
@@ -3018,7 +3027,7 @@ retry:
return ret;
hb = hash_futex(&key);
- spin_lock(&hb->lock);
+ raw_spin_lock(&hb->lock);
/*
* Check waiters first. We do not trust user space values at
@@ -3052,21 +3061,11 @@ retry:
* rt_waiter. Also see the WARN in wake_futex_pi().
*/
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
- /*
- * Magic trickery for now to make the RT migrate disable
- * logic happy. The following spin_unlock() happens with
- * interrupts disabled so the internal migrate_enable()
- * won't undo the migrate_disable() which was issued when
- * locking hb->lock.
- */
- migrate_disable();
- spin_unlock(&hb->lock);
+ raw_spin_unlock(&hb->lock);
/* drops pi_state->pi_mutex.wait_lock */
ret = wake_futex_pi(uaddr, uval, pi_state);
- migrate_enable();
-
put_pi_state(pi_state);
/*
@@ -3101,7 +3100,7 @@ retry:
* owner.
*/
if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) {
- spin_unlock(&hb->lock);
+ raw_spin_unlock(&hb->lock);
switch (ret) {
case -EFAULT:
goto pi_faulted;
@@ -3121,7 +3120,7 @@ retry:
ret = (curval == uval) ? 0 : -EAGAIN;
out_unlock:
- spin_unlock(&hb->lock);
+ raw_spin_unlock(&hb->lock);
out_putkey:
put_futex_key(&key);
return ret;
@@ -3237,7 +3236,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
struct hrtimer_sleeper timeout, *to = NULL;
struct futex_pi_state *pi_state = NULL;
struct rt_mutex_waiter rt_waiter;
- struct futex_hash_bucket *hb, *hb2;
+ struct futex_hash_bucket *hb;
union futex_key key2 = FUTEX_KEY_INIT;
struct futex_q q = futex_q_init;
int res, ret;
@@ -3295,55 +3294,20 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
/* Queue the futex_q, drop the hb lock, wait for wakeup. */
futex_wait_queue_me(hb, &q, to);
- /*
- * On RT we must avoid races with requeue and trying to block
- * on two mutexes (hb->lock and uaddr2's rtmutex) by
- * serializing access to pi_blocked_on with pi_lock.
- */
- raw_spin_lock_irq(&current->pi_lock);
- if (current->pi_blocked_on) {
- /*
- * We have been requeued or are in the process of
- * being requeued.
- */
- raw_spin_unlock_irq(&current->pi_lock);
- } else {
- /*
- * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS
- * prevents a concurrent requeue from moving us to the
- * uaddr2 rtmutex. After that we can safely acquire
- * (and possibly block on) hb->lock.
- */
- current->pi_blocked_on = PI_WAKEUP_INPROGRESS;
- raw_spin_unlock_irq(&current->pi_lock);
-
- spin_lock(&hb->lock);
-
- /*
- * Clean up pi_blocked_on. We might leak it otherwise
- * when we succeeded with the hb->lock in the fast
- * path.
- */
- raw_spin_lock_irq(&current->pi_lock);
- current->pi_blocked_on = NULL;
- raw_spin_unlock_irq(&current->pi_lock);
-
- ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
- spin_unlock(&hb->lock);
- if (ret)
- goto out_put_keys;
- }
+ raw_spin_lock(&hb->lock);
+ ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
+ raw_spin_unlock(&hb->lock);
+ if (ret)
+ goto out_put_keys;
/*
- * In order to be here, we have either been requeued, are in
- * the process of being requeued, or requeue successfully
- * acquired uaddr2 on our behalf. If pi_blocked_on was
- * non-null above, we may be racing with a requeue. Do not
- * rely on q->lock_ptr to be hb2->lock until after blocking on
- * hb->lock or hb2->lock. The futex_requeue dropped our key1
- * reference and incremented our key2 reference count.
+ * In order for us to be here, we know our q.key == key2, and since
+ * we took the hb->lock above, we also know that futex_requeue() has
+ * completed and we no longer have to concern ourselves with a wakeup
+ * race with the atomic proxy lock acquisition by the requeue code. The
+ * futex_requeue dropped our key1 reference and incremented our key2
+ * reference count.
*/
- hb2 = hash_futex(&key2);
/* Check if the requeue code acquired the second futex for us. */
if (!q.rt_waiter) {
@@ -3352,8 +3316,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
* did a lock-steal - fix up the PI-state in that case.
*/
if (q.pi_state && (q.pi_state->owner != current)) {
- spin_lock(&hb2->lock);
- BUG_ON(&hb2->lock != q.lock_ptr);
+ struct futex_pi_state *ps_free;
+
+ raw_spin_lock(q.lock_ptr);
ret = fixup_pi_state_owner(uaddr2, &q, current);
if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
pi_state = q.pi_state;
@@ -3363,8 +3328,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
* Drop the reference to the pi state which
* the requeue_pi() code acquired for us.
*/
- put_pi_state(q.pi_state);
- spin_unlock(&hb2->lock);
+ ps_free = __put_pi_state(q.pi_state);
+ raw_spin_unlock(q.lock_ptr);
+ kfree(ps_free);
}
} else {
struct rt_mutex *pi_mutex;
@@ -3378,8 +3344,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
pi_mutex = &q.pi_state->pi_mutex;
ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
- spin_lock(&hb2->lock);
- BUG_ON(&hb2->lock != q.lock_ptr);
+ raw_spin_lock(q.lock_ptr);
if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
ret = 0;
@@ -3816,7 +3781,7 @@ static int __init futex_init(void)
for (i = 0; i < futex_hashsize; i++) {
atomic_set(&futex_queues[i].waiters, 0);
plist_head_init(&futex_queues[i].chain);
- spin_lock_init(&futex_queues[i].lock);
+ raw_spin_lock_init(&futex_queues[i].lock);
}
return 0;