From 1acdac104668a0834cfa267de9946fac7764d486 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Nov 2008 10:02:53 -0800 Subject: futex: make clock selectable for FUTEX_WAIT_BITSET FUTEX_WAIT_BITSET could be used instead of FUTEX_WAIT by setting the bit set to FUTEX_BITSET_MATCH_ANY, but FUTEX_WAIT uses CLOCK_REALTIME while FUTEX_WAIT_BITSET uses CLOCK_MONOTONIC. Add a flag to select CLOCK_REALTIME for FUTEX_WAIT_BITSET so glibc can replace the FUTEX_WAIT logic which needs to do gettimeofday() calls before and after the syscall to convert the absolute timeout to a relative timeout for FUTEX_WAIT. Signed-off-by: Thomas Gleixner Cc: Ulrich Drepper --- kernel/futex.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index e10c5c8786a6..ba0d3b83c091 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1142,12 +1142,13 @@ handle_fault: * In case we must use restart_block to restart a futex_wait, * we encode in the 'flags' shared capability */ -#define FLAGS_SHARED 1 +#define FLAGS_SHARED 0x01 +#define FLAGS_CLOCKRT 0x02 static long futex_wait_restart(struct restart_block *restart); static int futex_wait(u32 __user *uaddr, int fshared, - u32 val, ktime_t *abs_time, u32 bitset) + u32 val, ktime_t *abs_time, u32 bitset, int clockrt) { struct task_struct *curr = current; DECLARE_WAITQUEUE(wait, curr); @@ -1233,8 +1234,10 @@ static int futex_wait(u32 __user *uaddr, int fshared, slack = current->timer_slack_ns; if (rt_task(current)) slack = 0; - hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, - HRTIMER_MODE_ABS); + hrtimer_init_on_stack(&t.timer, + clockrt ? CLOCK_REALTIME : + CLOCK_MONOTONIC, + HRTIMER_MODE_ABS); hrtimer_init_sleeper(&t, current); hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack); @@ -1289,6 +1292,8 @@ static int futex_wait(u32 __user *uaddr, int fshared, if (fshared) restart->futex.flags |= FLAGS_SHARED; + if (clockrt) + restart->futex.flags |= FLAGS_CLOCKRT; return -ERESTART_RESTARTBLOCK; } @@ -1312,7 +1317,8 @@ static long futex_wait_restart(struct restart_block *restart) if (restart->futex.flags & FLAGS_SHARED) fshared = 1; return (long)futex_wait(uaddr, fshared, restart->futex.val, &t, - restart->futex.bitset); + restart->futex.bitset, + restart->futex.flags & FLAGS_CLOCKRT); } @@ -1905,18 +1911,22 @@ void exit_robust_list(struct task_struct *curr) long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) { - int ret = -ENOSYS; + int clockrt, ret = -ENOSYS; int cmd = op & FUTEX_CMD_MASK; int fshared = 0; if (!(op & FUTEX_PRIVATE_FLAG)) fshared = 1; + clockrt = op & FUTEX_CLOCK_REALTIME; + if (clockrt && cmd != FUTEX_WAIT_BITSET) + return -ENOSYS; + switch (cmd) { case FUTEX_WAIT: val3 = FUTEX_BITSET_MATCH_ANY; case FUTEX_WAIT_BITSET: - ret = futex_wait(uaddr, fshared, val, timeout, val3); + ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt); break; case FUTEX_WAKE: val3 = FUTEX_BITSET_MATCH_ANY; -- cgit v1.2.3 From 73500ac545d24610eb2cf8579ffc88957e9c5847 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Wed, 17 Dec 2008 17:29:56 -0800 Subject: futex: rename field in futex_q to clarify single waiter semantics Impact: simplify code I've tripped over the naming of this field a couple times. The futex_q uses a "waiters" list to represent a single blocked task and then calles wake_up_all(). This can lead to confusion in trying to understand the intent of the code, which is to have a single futex_q for every task waiting on a futex. This patch corrects the problem, using a single pointer to the waiting task, and an appropriate call to wake_up, rather than wake_up_all. Compile and boot tested on an 8way x86_64 machine. Signed-off-by: Darren Hart Acked-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/futex.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index ba0d3b83c091..99f8acce08bf 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -92,11 +92,12 @@ struct futex_pi_state { * A futex_q has a woken state, just like tasks have TASK_RUNNING. * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. * The order of wakup is always to make the first condition true, then - * wake up q->waiters, then make the second condition true. + * wake up q->waiter, then make the second condition true. */ struct futex_q { struct plist_node list; - wait_queue_head_t waiters; + /* There can only be a single waiter */ + wait_queue_head_t waiter; /* Which hash list lock to use: */ spinlock_t *lock_ptr; @@ -573,7 +574,7 @@ static void wake_futex(struct futex_q *q) * The lock in wake_up_all() is a crucial memory barrier after the * plist_del() and also before assigning to q->lock_ptr. */ - wake_up_all(&q->waiters); + wake_up(&q->waiter); /* * The waiting task can free the futex_q as soon as this is written, * without taking any locks. This must come last. @@ -930,7 +931,7 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) { struct futex_hash_bucket *hb; - init_waitqueue_head(&q->waiters); + init_waitqueue_head(&q->waiter); get_futex_key_refs(&q->key); hb = hash_futex(&q->key); @@ -1221,7 +1222,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, /* add_wait_queue is the barrier after __set_current_state. */ __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&q.waiters, &wait); + add_wait_queue(&q.waiter, &wait); /* * !plist_node_empty() is safe here without any lock. * q.lock_ptr != 0 is not safe, because of ordering against wakeup. -- cgit v1.2.3 From b56863630ddbdea6e22df8835f78f0b1da037103 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Thu, 18 Dec 2008 15:06:34 -0800 Subject: futex: clean up futex_(un)lock_pi fault handling Impact: cleanup Some apparently left over cruft code was complicating the fault logic: Testing if uval != -EFAULT doesn't have any meaning, get_user() sets ret to either 0 or -EFAULT, there's no need to compare uval, especially not against EFAULT which it will never be. This patch removes the superfluous test and clarifies the comment blocks. Build and boot tested on an 8way x86_64 system. Signed-off-by: Darren Hart Signed-off-by: Ingo Molnar --- kernel/futex.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index 99f8acce08bf..b4f87bac91c1 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1565,12 +1565,11 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, uaddr_faulted: /* - * We have to r/w *(int __user *)uaddr, but we can't modify it - * non-atomically. Therefore, if get_user below is not - * enough, we need to handle the fault ourselves, while - * still holding the mmap_sem. - * - * ... and hb->lock. :-) --ANK + * We have to r/w *(int __user *)uaddr, and we have to modify it + * atomically. Therefore, if we continue to fault after get_user() + * below, we need to handle the fault ourselves, while still holding + * the mmap_sem. This can occur if the uaddr is under contention as + * we have to drop the mmap_sem in order to call get_user(). */ queue_unlock(&q, hb); @@ -1582,7 +1581,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, } ret = get_user(uval, uaddr); - if (!ret && (uval != -EFAULT)) + if (!ret) goto retry; if (to) @@ -1676,12 +1675,11 @@ out: pi_faulted: /* - * We have to r/w *(int __user *)uaddr, but we can't modify it - * non-atomically. Therefore, if get_user below is not - * enough, we need to handle the fault ourselves, while - * still holding the mmap_sem. - * - * ... and hb->lock. --ANK + * We have to r/w *(int __user *)uaddr, and we have to modify it + * atomically. Therefore, if we continue to fault after get_user() + * below, we need to handle the fault ourselves, while still holding + * the mmap_sem. This can occur if the uaddr is under contention as + * we have to drop the mmap_sem in order to call get_user(). */ spin_unlock(&hb->lock); @@ -1694,7 +1692,7 @@ pi_faulted: } ret = get_user(uval, uaddr); - if (!ret && (uval != -EFAULT)) + if (!ret) goto retry; return ret; -- cgit v1.2.3