|
@@ -381,15 +381,16 @@ static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
|
|
return NULL;
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
|
|
-static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
|
|
|
|
|
|
+static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
|
|
|
|
+ u32 uval, u32 newval)
|
|
{
|
|
{
|
|
- u32 curval;
|
|
|
|
|
|
+ int ret;
|
|
|
|
|
|
pagefault_disable();
|
|
pagefault_disable();
|
|
- curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
|
|
|
|
|
|
+ ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
|
|
pagefault_enable();
|
|
pagefault_enable();
|
|
|
|
|
|
- return curval;
|
|
|
|
|
|
+ return ret;
|
|
}
|
|
}
|
|
|
|
|
|
static int get_futex_value_locked(u32 *dest, u32 __user *from)
|
|
static int get_futex_value_locked(u32 *dest, u32 __user *from)
|
|
@@ -674,7 +675,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
|
|
struct task_struct *task, int set_waiters)
|
|
struct task_struct *task, int set_waiters)
|
|
{
|
|
{
|
|
int lock_taken, ret, ownerdied = 0;
|
|
int lock_taken, ret, ownerdied = 0;
|
|
- u32 uval, newval, curval;
|
|
|
|
|
|
+ u32 uval, newval, curval, vpid = task_pid_vnr(task);
|
|
|
|
|
|
retry:
|
|
retry:
|
|
ret = lock_taken = 0;
|
|
ret = lock_taken = 0;
|
|
@@ -684,19 +685,17 @@ retry:
|
|
* (by doing a 0 -> TID atomic cmpxchg), while holding all
|
|
* (by doing a 0 -> TID atomic cmpxchg), while holding all
|
|
* the locks. It will most likely not succeed.
|
|
* the locks. It will most likely not succeed.
|
|
*/
|
|
*/
|
|
- newval = task_pid_vnr(task);
|
|
|
|
|
|
+ newval = vpid;
|
|
if (set_waiters)
|
|
if (set_waiters)
|
|
newval |= FUTEX_WAITERS;
|
|
newval |= FUTEX_WAITERS;
|
|
|
|
|
|
- curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
|
|
|
|
-
|
|
|
|
- if (unlikely(curval == -EFAULT))
|
|
|
|
|
|
+ if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
|
|
return -EFAULT;
|
|
return -EFAULT;
|
|
|
|
|
|
/*
|
|
/*
|
|
* Detect deadlocks.
|
|
* Detect deadlocks.
|
|
*/
|
|
*/
|
|
- if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task))))
|
|
|
|
|
|
+ if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
|
|
return -EDEADLK;
|
|
return -EDEADLK;
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -723,14 +722,12 @@ retry:
|
|
*/
|
|
*/
|
|
if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
|
|
if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
|
|
/* Keep the OWNER_DIED bit */
|
|
/* Keep the OWNER_DIED bit */
|
|
- newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task);
|
|
|
|
|
|
+ newval = (curval & ~FUTEX_TID_MASK) | vpid;
|
|
ownerdied = 0;
|
|
ownerdied = 0;
|
|
lock_taken = 1;
|
|
lock_taken = 1;
|
|
}
|
|
}
|
|
|
|
|
|
- curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
|
|
|
|
-
|
|
|
|
- if (unlikely(curval == -EFAULT))
|
|
|
|
|
|
+ if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
|
|
return -EFAULT;
|
|
return -EFAULT;
|
|
if (unlikely(curval != uval))
|
|
if (unlikely(curval != uval))
|
|
goto retry;
|
|
goto retry;
|
|
@@ -775,6 +772,24 @@ retry:
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/**
|
|
|
|
+ * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
|
|
|
|
+ * @q: The futex_q to unqueue
|
|
|
|
+ *
|
|
|
|
+ * The q->lock_ptr must not be NULL and must be held by the caller.
|
|
|
|
+ */
|
|
|
|
+static void __unqueue_futex(struct futex_q *q)
|
|
|
|
+{
|
|
|
|
+ struct futex_hash_bucket *hb;
|
|
|
|
+
|
|
|
|
+ if (WARN_ON(!q->lock_ptr || !spin_is_locked(q->lock_ptr)
|
|
|
|
+ || plist_node_empty(&q->list)))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
|
|
|
|
+ plist_del(&q->list, &hb->chain);
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* The hash bucket lock must be held when this is called.
|
|
* The hash bucket lock must be held when this is called.
|
|
* Afterwards, the futex_q must not be accessed.
|
|
* Afterwards, the futex_q must not be accessed.
|
|
@@ -792,7 +807,7 @@ static void wake_futex(struct futex_q *q)
|
|
*/
|
|
*/
|
|
get_task_struct(p);
|
|
get_task_struct(p);
|
|
|
|
|
|
- plist_del(&q->list, &q->list.plist);
|
|
|
|
|
|
+ __unqueue_futex(q);
|
|
/*
|
|
/*
|
|
* The waiting task can free the futex_q as soon as
|
|
* The waiting task can free the futex_q as soon as
|
|
* q->lock_ptr = NULL is written, without taking any locks. A
|
|
* q->lock_ptr = NULL is written, without taking any locks. A
|
|
@@ -843,9 +858,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
|
|
|
|
|
|
newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
|
|
newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
|
|
|
|
|
|
- curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
|
|
|
|
-
|
|
|
|
- if (curval == -EFAULT)
|
|
|
|
|
|
+ if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
|
|
ret = -EFAULT;
|
|
ret = -EFAULT;
|
|
else if (curval != uval)
|
|
else if (curval != uval)
|
|
ret = -EINVAL;
|
|
ret = -EINVAL;
|
|
@@ -880,10 +893,8 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
|
|
* There is no waiter, so we unlock the futex. The owner died
|
|
* There is no waiter, so we unlock the futex. The owner died
|
|
* bit has not to be preserved here. We are the owner:
|
|
* bit has not to be preserved here. We are the owner:
|
|
*/
|
|
*/
|
|
- oldval = cmpxchg_futex_value_locked(uaddr, uval, 0);
|
|
|
|
-
|
|
|
|
- if (oldval == -EFAULT)
|
|
|
|
- return oldval;
|
|
|
|
|
|
+ if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
|
|
|
|
+ return -EFAULT;
|
|
if (oldval != uval)
|
|
if (oldval != uval)
|
|
return -EAGAIN;
|
|
return -EAGAIN;
|
|
|
|
|
|
@@ -1071,9 +1082,6 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
|
|
plist_del(&q->list, &hb1->chain);
|
|
plist_del(&q->list, &hb1->chain);
|
|
plist_add(&q->list, &hb2->chain);
|
|
plist_add(&q->list, &hb2->chain);
|
|
q->lock_ptr = &hb2->lock;
|
|
q->lock_ptr = &hb2->lock;
|
|
-#ifdef CONFIG_DEBUG_PI_LIST
|
|
|
|
- q->list.plist.spinlock = &hb2->lock;
|
|
|
|
-#endif
|
|
|
|
}
|
|
}
|
|
get_futex_key_refs(key2);
|
|
get_futex_key_refs(key2);
|
|
q->key = *key2;
|
|
q->key = *key2;
|
|
@@ -1100,16 +1108,12 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
|
|
get_futex_key_refs(key);
|
|
get_futex_key_refs(key);
|
|
q->key = *key;
|
|
q->key = *key;
|
|
|
|
|
|
- WARN_ON(plist_node_empty(&q->list));
|
|
|
|
- plist_del(&q->list, &q->list.plist);
|
|
|
|
|
|
+ __unqueue_futex(q);
|
|
|
|
|
|
WARN_ON(!q->rt_waiter);
|
|
WARN_ON(!q->rt_waiter);
|
|
q->rt_waiter = NULL;
|
|
q->rt_waiter = NULL;
|
|
|
|
|
|
q->lock_ptr = &hb->lock;
|
|
q->lock_ptr = &hb->lock;
|
|
-#ifdef CONFIG_DEBUG_PI_LIST
|
|
|
|
- q->list.plist.spinlock = &hb->lock;
|
|
|
|
-#endif
|
|
|
|
|
|
|
|
wake_up_state(q->task, TASK_NORMAL);
|
|
wake_up_state(q->task, TASK_NORMAL);
|
|
}
|
|
}
|
|
@@ -1457,9 +1461,6 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
|
|
prio = min(current->normal_prio, MAX_RT_PRIO);
|
|
prio = min(current->normal_prio, MAX_RT_PRIO);
|
|
|
|
|
|
plist_node_init(&q->list, prio);
|
|
plist_node_init(&q->list, prio);
|
|
-#ifdef CONFIG_DEBUG_PI_LIST
|
|
|
|
- q->list.plist.spinlock = &hb->lock;
|
|
|
|
-#endif
|
|
|
|
plist_add(&q->list, &hb->chain);
|
|
plist_add(&q->list, &hb->chain);
|
|
q->task = current;
|
|
q->task = current;
|
|
spin_unlock(&hb->lock);
|
|
spin_unlock(&hb->lock);
|
|
@@ -1504,8 +1505,7 @@ retry:
|
|
spin_unlock(lock_ptr);
|
|
spin_unlock(lock_ptr);
|
|
goto retry;
|
|
goto retry;
|
|
}
|
|
}
|
|
- WARN_ON(plist_node_empty(&q->list));
|
|
|
|
- plist_del(&q->list, &q->list.plist);
|
|
|
|
|
|
+ __unqueue_futex(q);
|
|
|
|
|
|
BUG_ON(q->pi_state);
|
|
BUG_ON(q->pi_state);
|
|
|
|
|
|
@@ -1525,8 +1525,7 @@ retry:
|
|
static void unqueue_me_pi(struct futex_q *q)
|
|
static void unqueue_me_pi(struct futex_q *q)
|
|
__releases(q->lock_ptr)
|
|
__releases(q->lock_ptr)
|
|
{
|
|
{
|
|
- WARN_ON(plist_node_empty(&q->list));
|
|
|
|
- plist_del(&q->list, &q->list.plist);
|
|
|
|
|
|
+ __unqueue_futex(q);
|
|
|
|
|
|
BUG_ON(!q->pi_state);
|
|
BUG_ON(!q->pi_state);
|
|
free_pi_state(q->pi_state);
|
|
free_pi_state(q->pi_state);
|
|
@@ -1578,9 +1577,7 @@ retry:
|
|
while (1) {
|
|
while (1) {
|
|
newval = (uval & FUTEX_OWNER_DIED) | newtid;
|
|
newval = (uval & FUTEX_OWNER_DIED) | newtid;
|
|
|
|
|
|
- curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
|
|
|
|
-
|
|
|
|
- if (curval == -EFAULT)
|
|
|
|
|
|
+ if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
|
|
goto handle_fault;
|
|
goto handle_fault;
|
|
if (curval == uval)
|
|
if (curval == uval)
|
|
break;
|
|
break;
|
|
@@ -1781,13 +1778,14 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
|
|
*
|
|
*
|
|
* The basic logical guarantee of a futex is that it blocks ONLY
|
|
* The basic logical guarantee of a futex is that it blocks ONLY
|
|
* if cond(var) is known to be true at the time of blocking, for
|
|
* if cond(var) is known to be true at the time of blocking, for
|
|
- * any cond. If we queued after testing *uaddr, that would open
|
|
|
|
- * a race condition where we could block indefinitely with
|
|
|
|
|
|
+ * any cond. If we locked the hash-bucket after testing *uaddr, that
|
|
|
|
+ * would open a race condition where we could block indefinitely with
|
|
* cond(var) false, which would violate the guarantee.
|
|
* cond(var) false, which would violate the guarantee.
|
|
*
|
|
*
|
|
- * A consequence is that futex_wait() can return zero and absorb
|
|
|
|
- * a wakeup when *uaddr != val on entry to the syscall. This is
|
|
|
|
- * rare, but normal.
|
|
|
|
|
|
+ * On the other hand, we insert q and release the hash-bucket only
|
|
|
|
+ * after testing *uaddr. This guarantees that futex_wait() will NOT
|
|
|
|
+ * absorb a wakeup if *uaddr does not match the desired values
|
|
|
|
+ * while the syscall executes.
|
|
*/
|
|
*/
|
|
retry:
|
|
retry:
|
|
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key);
|
|
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key);
|
|
@@ -2046,9 +2044,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
|
|
{
|
|
{
|
|
struct futex_hash_bucket *hb;
|
|
struct futex_hash_bucket *hb;
|
|
struct futex_q *this, *next;
|
|
struct futex_q *this, *next;
|
|
- u32 uval;
|
|
|
|
struct plist_head *head;
|
|
struct plist_head *head;
|
|
union futex_key key = FUTEX_KEY_INIT;
|
|
union futex_key key = FUTEX_KEY_INIT;
|
|
|
|
+ u32 uval, vpid = task_pid_vnr(current);
|
|
int ret;
|
|
int ret;
|
|
|
|
|
|
retry:
|
|
retry:
|
|
@@ -2057,7 +2055,7 @@ retry:
|
|
/*
|
|
/*
|
|
* We release only a lock we actually own:
|
|
* We release only a lock we actually own:
|
|
*/
|
|
*/
|
|
- if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
|
|
|
|
|
|
+ if ((uval & FUTEX_TID_MASK) != vpid)
|
|
return -EPERM;
|
|
return -EPERM;
|
|
|
|
|
|
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
|
|
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
|
|
@@ -2072,17 +2070,14 @@ retry:
|
|
* again. If it succeeds then we can return without waking
|
|
* again. If it succeeds then we can return without waking
|
|
* anyone else up:
|
|
* anyone else up:
|
|
*/
|
|
*/
|
|
- if (!(uval & FUTEX_OWNER_DIED))
|
|
|
|
- uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0);
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- if (unlikely(uval == -EFAULT))
|
|
|
|
|
|
+ if (!(uval & FUTEX_OWNER_DIED) &&
|
|
|
|
+ cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
|
|
goto pi_faulted;
|
|
goto pi_faulted;
|
|
/*
|
|
/*
|
|
* Rare case: we managed to release the lock atomically,
|
|
* Rare case: we managed to release the lock atomically,
|
|
* no need to wake anyone else up:
|
|
* no need to wake anyone else up:
|
|
*/
|
|
*/
|
|
- if (unlikely(uval == task_pid_vnr(current)))
|
|
|
|
|
|
+ if (unlikely(uval == vpid))
|
|
goto out_unlock;
|
|
goto out_unlock;
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -2167,7 +2162,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
|
|
* We were woken prior to requeue by a timeout or a signal.
|
|
* We were woken prior to requeue by a timeout or a signal.
|
|
* Unqueue the futex_q and determine which it was.
|
|
* Unqueue the futex_q and determine which it was.
|
|
*/
|
|
*/
|
|
- plist_del(&q->list, &q->list.plist);
|
|
|
|
|
|
+ plist_del(&q->list, &hb->chain);
|
|
|
|
|
|
/* Handle spurious wakeups gracefully */
|
|
/* Handle spurious wakeups gracefully */
|
|
ret = -EWOULDBLOCK;
|
|
ret = -EWOULDBLOCK;
|
|
@@ -2463,11 +2458,20 @@ retry:
|
|
* userspace.
|
|
* userspace.
|
|
*/
|
|
*/
|
|
mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
|
|
mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
|
|
- nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
|
|
|
|
-
|
|
|
|
- if (nval == -EFAULT)
|
|
|
|
- return -1;
|
|
|
|
-
|
|
|
|
|
|
+ /*
|
|
|
|
+ * We are not holding a lock here, but we want to have
|
|
|
|
+ * the pagefault_disable/enable() protection because
|
|
|
|
+ * we want to handle the fault gracefully. If the
|
|
|
|
+ * access fails we try to fault in the futex with R/W
|
|
|
|
+ * verification via get_user_pages. get_user() above
|
|
|
|
+ * does not guarantee R/W access. If that fails we
|
|
|
|
+ * give up and leave the futex locked.
|
|
|
|
+ */
|
|
|
|
+ if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
|
|
|
|
+ if (fault_in_user_writeable(uaddr))
|
|
|
|
+ return -1;
|
|
|
|
+ goto retry;
|
|
|
|
+ }
|
|
if (nval != uval)
|
|
if (nval != uval)
|
|
goto retry;
|
|
goto retry;
|
|
|
|
|
|
@@ -2678,8 +2682,7 @@ static int __init futex_init(void)
|
|
* implementation, the non-functional ones will return
|
|
* implementation, the non-functional ones will return
|
|
* -ENOSYS.
|
|
* -ENOSYS.
|
|
*/
|
|
*/
|
|
- curval = cmpxchg_futex_value_locked(NULL, 0, 0);
|
|
|
|
- if (curval == -EFAULT)
|
|
|
|
|
|
+ if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
|
|
futex_cmpxchg_enabled = 1;
|
|
futex_cmpxchg_enabled = 1;
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
|
|
for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
|