|
@@ -53,6 +53,12 @@
|
|
|
|
|
|
#include "rtmutex_common.h"
|
|
|
|
|
|
+#ifdef CONFIG_DEBUG_RT_MUTEXES
|
|
|
+# include "rtmutex-debug.h"
|
|
|
+#else
|
|
|
+# include "rtmutex.h"
|
|
|
+#endif
|
|
|
+
|
|
|
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
|
|
|
|
|
|
/*
|
|
@@ -102,6 +108,12 @@ struct futex_q {
|
|
|
/* Optional priority inheritance state: */
|
|
|
struct futex_pi_state *pi_state;
|
|
|
struct task_struct *task;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * This waiter is used in case of requeue from a
|
|
|
+ * normal futex to a PI-futex
|
|
|
+ */
|
|
|
+ struct rt_mutex_waiter waiter;
|
|
|
};
|
|
|
|
|
|
/*
|
|
@@ -180,6 +192,9 @@ int get_futex_key(u32 __user *uaddr, union futex_key *key)
|
|
|
if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
|
|
|
return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
|
|
|
|
|
|
+ /* Save the user address in the ley */
|
|
|
+ key->uaddr = uaddr;
|
|
|
+
|
|
|
/*
|
|
|
* Private mappings are handled in a simple way.
|
|
|
*
|
|
@@ -439,7 +454,8 @@ void exit_pi_state_list(struct task_struct *curr)
|
|
|
}
|
|
|
|
|
|
static int
|
|
|
-lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
|
|
|
+lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
|
|
|
+ union futex_key *key, struct futex_pi_state **ps)
|
|
|
{
|
|
|
struct futex_pi_state *pi_state = NULL;
|
|
|
struct futex_q *this, *next;
|
|
@@ -450,7 +466,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
|
|
|
head = &hb->chain;
|
|
|
|
|
|
plist_for_each_entry_safe(this, next, head, list) {
|
|
|
- if (match_futex(&this->key, &me->key)) {
|
|
|
+ if (match_futex(&this->key, key)) {
|
|
|
/*
|
|
|
* Another waiter already exists - bump up
|
|
|
* the refcount and return its pi_state:
|
|
@@ -465,7 +481,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
|
|
|
WARN_ON(!atomic_read(&pi_state->refcount));
|
|
|
|
|
|
atomic_inc(&pi_state->refcount);
|
|
|
- me->pi_state = pi_state;
|
|
|
+ *ps = pi_state;
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
@@ -492,7 +508,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
|
|
|
rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
|
|
|
|
|
|
/* Store the key for possible exit cleanups: */
|
|
|
- pi_state->key = me->key;
|
|
|
+ pi_state->key = *key;
|
|
|
|
|
|
spin_lock_irq(&p->pi_lock);
|
|
|
WARN_ON(!list_empty(&pi_state->list));
|
|
@@ -502,7 +518,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
|
|
|
|
|
|
put_task_struct(p);
|
|
|
|
|
|
- me->pi_state = pi_state;
|
|
|
+ *ps = pi_state;
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
@@ -562,6 +578,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
|
|
|
*/
|
|
|
if (!(uval & FUTEX_OWNER_DIED)) {
|
|
|
newval = FUTEX_WAITERS | new_owner->pid;
|
|
|
+ /* Keep the FUTEX_WAITER_REQUEUED flag if it was set */
|
|
|
+ newval |= (uval & FUTEX_WAITER_REQUEUED);
|
|
|
|
|
|
pagefault_disable();
|
|
|
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
|
|
@@ -665,6 +683,254 @@ out:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Called from futex_requeue_pi.
|
|
|
+ * Set FUTEX_WAITERS and FUTEX_WAITER_REQUEUED flags on the
|
|
|
+ * PI-futex value; search its associated pi_state if an owner exist
|
|
|
+ * or create a new one without owner.
|
|
|
+ */
|
|
|
+static inline int
|
|
|
+lookup_pi_state_for_requeue(u32 __user *uaddr, struct futex_hash_bucket *hb,
|
|
|
+ union futex_key *key,
|
|
|
+ struct futex_pi_state **pi_state)
|
|
|
+{
|
|
|
+ u32 curval, uval, newval;
|
|
|
+
|
|
|
+retry:
|
|
|
+ /*
|
|
|
+ * We can't handle a fault cleanly because we can't
|
|
|
+ * release the locks here. Simply return the fault.
|
|
|
+ */
|
|
|
+ if (get_futex_value_locked(&curval, uaddr))
|
|
|
+ return -EFAULT;
|
|
|
+
|
|
|
+ /* set the flags FUTEX_WAITERS and FUTEX_WAITER_REQUEUED */
|
|
|
+ if ((curval & (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED))
|
|
|
+ != (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED)) {
|
|
|
+ /*
|
|
|
+ * No waiters yet, we prepare the futex to have some waiters.
|
|
|
+ */
|
|
|
+
|
|
|
+ uval = curval;
|
|
|
+ newval = uval | FUTEX_WAITERS | FUTEX_WAITER_REQUEUED;
|
|
|
+
|
|
|
+ pagefault_disable();
|
|
|
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
|
|
|
+ pagefault_enable();
|
|
|
+
|
|
|
+ if (unlikely(curval == -EFAULT))
|
|
|
+ return -EFAULT;
|
|
|
+ if (unlikely(curval != uval))
|
|
|
+ goto retry;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!(curval & FUTEX_TID_MASK)
|
|
|
+ || lookup_pi_state(curval, hb, key, pi_state)) {
|
|
|
+ /* the futex has no owner (yet) or the lookup failed:
|
|
|
+ allocate one pi_state without owner */
|
|
|
+
|
|
|
+ *pi_state = alloc_pi_state();
|
|
|
+
|
|
|
+ /* Already stores the key: */
|
|
|
+ (*pi_state)->key = *key;
|
|
|
+
|
|
|
+ /* init the mutex without owner */
|
|
|
+ __rt_mutex_init(&(*pi_state)->pi_mutex, NULL);
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Keep the first nr_wake waiter from futex1, wake up one,
|
|
|
+ * and requeue the next nr_requeue waiters following hashed on
|
|
|
+ * one physical page to another physical page (PI-futex uaddr2)
|
|
|
+ */
|
|
|
+static int futex_requeue_pi(u32 __user *uaddr1, u32 __user *uaddr2,
|
|
|
+ int nr_wake, int nr_requeue, u32 *cmpval)
|
|
|
+{
|
|
|
+ union futex_key key1, key2;
|
|
|
+ struct futex_hash_bucket *hb1, *hb2;
|
|
|
+ struct plist_head *head1;
|
|
|
+ struct futex_q *this, *next;
|
|
|
+ struct futex_pi_state *pi_state2 = NULL;
|
|
|
+ struct rt_mutex_waiter *waiter, *top_waiter = NULL;
|
|
|
+ struct rt_mutex *lock2 = NULL;
|
|
|
+ int ret, drop_count = 0;
|
|
|
+
|
|
|
+ if (refill_pi_state_cache())
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+retry:
|
|
|
+ /*
|
|
|
+ * First take all the futex related locks:
|
|
|
+ */
|
|
|
+ down_read(¤t->mm->mmap_sem);
|
|
|
+
|
|
|
+ ret = get_futex_key(uaddr1, &key1);
|
|
|
+ if (unlikely(ret != 0))
|
|
|
+ goto out;
|
|
|
+ ret = get_futex_key(uaddr2, &key2);
|
|
|
+ if (unlikely(ret != 0))
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ hb1 = hash_futex(&key1);
|
|
|
+ hb2 = hash_futex(&key2);
|
|
|
+
|
|
|
+ double_lock_hb(hb1, hb2);
|
|
|
+
|
|
|
+ if (likely(cmpval != NULL)) {
|
|
|
+ u32 curval;
|
|
|
+
|
|
|
+ ret = get_futex_value_locked(&curval, uaddr1);
|
|
|
+
|
|
|
+ if (unlikely(ret)) {
|
|
|
+ spin_unlock(&hb1->lock);
|
|
|
+ if (hb1 != hb2)
|
|
|
+ spin_unlock(&hb2->lock);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If we would have faulted, release mmap_sem, fault
|
|
|
+ * it in and start all over again.
|
|
|
+ */
|
|
|
+ up_read(¤t->mm->mmap_sem);
|
|
|
+
|
|
|
+ ret = get_user(curval, uaddr1);
|
|
|
+
|
|
|
+ if (!ret)
|
|
|
+ goto retry;
|
|
|
+
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+ if (curval != *cmpval) {
|
|
|
+ ret = -EAGAIN;
|
|
|
+ goto out_unlock;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ head1 = &hb1->chain;
|
|
|
+ plist_for_each_entry_safe(this, next, head1, list) {
|
|
|
+ if (!match_futex (&this->key, &key1))
|
|
|
+ continue;
|
|
|
+ if (++ret <= nr_wake) {
|
|
|
+ wake_futex(this);
|
|
|
+ } else {
|
|
|
+ /*
|
|
|
+ * FIRST: get and set the pi_state
|
|
|
+ */
|
|
|
+ if (!pi_state2) {
|
|
|
+ int s;
|
|
|
+ /* do this only the first time we requeue someone */
|
|
|
+ s = lookup_pi_state_for_requeue(uaddr2, hb2,
|
|
|
+ &key2, &pi_state2);
|
|
|
+ if (s) {
|
|
|
+ ret = s;
|
|
|
+ goto out_unlock;
|
|
|
+ }
|
|
|
+
|
|
|
+ lock2 = &pi_state2->pi_mutex;
|
|
|
+ spin_lock(&lock2->wait_lock);
|
|
|
+
|
|
|
+ /* Save the top waiter of the wait_list */
|
|
|
+ if (rt_mutex_has_waiters(lock2))
|
|
|
+ top_waiter = rt_mutex_top_waiter(lock2);
|
|
|
+ } else
|
|
|
+ atomic_inc(&pi_state2->refcount);
|
|
|
+
|
|
|
+
|
|
|
+ this->pi_state = pi_state2;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * SECOND: requeue futex_q to the correct hashbucket
|
|
|
+ */
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If key1 and key2 hash to the same bucket, no need to
|
|
|
+ * requeue.
|
|
|
+ */
|
|
|
+ if (likely(head1 != &hb2->chain)) {
|
|
|
+ plist_del(&this->list, &hb1->chain);
|
|
|
+ plist_add(&this->list, &hb2->chain);
|
|
|
+ this->lock_ptr = &hb2->lock;
|
|
|
+#ifdef CONFIG_DEBUG_PI_LIST
|
|
|
+ this->list.plist.lock = &hb2->lock;
|
|
|
+#endif
|
|
|
+ }
|
|
|
+ this->key = key2;
|
|
|
+ get_futex_key_refs(&key2);
|
|
|
+ drop_count++;
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * THIRD: queue it to lock2
|
|
|
+ */
|
|
|
+ spin_lock_irq(&this->task->pi_lock);
|
|
|
+ waiter = &this->waiter;
|
|
|
+ waiter->task = this->task;
|
|
|
+ waiter->lock = lock2;
|
|
|
+ plist_node_init(&waiter->list_entry, this->task->prio);
|
|
|
+ plist_node_init(&waiter->pi_list_entry, this->task->prio);
|
|
|
+ plist_add(&waiter->list_entry, &lock2->wait_list);
|
|
|
+ this->task->pi_blocked_on = waiter;
|
|
|
+ spin_unlock_irq(&this->task->pi_lock);
|
|
|
+
|
|
|
+ if (ret - nr_wake >= nr_requeue)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* If we've requeued some tasks and the top_waiter of the rt_mutex
|
|
|
+ has changed, we must adjust the priority of the owner, if any */
|
|
|
+ if (drop_count) {
|
|
|
+ struct task_struct *owner = rt_mutex_owner(lock2);
|
|
|
+ if (owner &&
|
|
|
+ (top_waiter != (waiter = rt_mutex_top_waiter(lock2)))) {
|
|
|
+ int chain_walk = 0;
|
|
|
+
|
|
|
+ spin_lock_irq(&owner->pi_lock);
|
|
|
+ if (top_waiter)
|
|
|
+ plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
|
|
|
+ else
|
|
|
+ /*
|
|
|
+ * There was no waiters before the requeue,
|
|
|
+ * the flag must be updated
|
|
|
+ */
|
|
|
+ mark_rt_mutex_waiters(lock2);
|
|
|
+
|
|
|
+ plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
|
|
|
+ __rt_mutex_adjust_prio(owner);
|
|
|
+ if (owner->pi_blocked_on) {
|
|
|
+ chain_walk = 1;
|
|
|
+ get_task_struct(owner);
|
|
|
+ }
|
|
|
+
|
|
|
+ spin_unlock_irq(&owner->pi_lock);
|
|
|
+ spin_unlock(&lock2->wait_lock);
|
|
|
+
|
|
|
+ if (chain_walk)
|
|
|
+ rt_mutex_adjust_prio_chain(owner, 0, lock2, NULL,
|
|
|
+ current);
|
|
|
+ } else {
|
|
|
+ /* No owner or the top_waiter does not change */
|
|
|
+ mark_rt_mutex_waiters(lock2);
|
|
|
+ spin_unlock(&lock2->wait_lock);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+out_unlock:
|
|
|
+ spin_unlock(&hb1->lock);
|
|
|
+ if (hb1 != hb2)
|
|
|
+ spin_unlock(&hb2->lock);
|
|
|
+
|
|
|
+ /* drop_futex_key_refs() must be called outside the spinlocks. */
|
|
|
+ while (--drop_count >= 0)
|
|
|
+ drop_futex_key_refs(&key1);
|
|
|
+
|
|
|
+out:
|
|
|
+ up_read(¤t->mm->mmap_sem);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Wake up all waiters hashed on the physical page that is mapped
|
|
|
* to this virtual address:
|
|
@@ -984,9 +1250,10 @@ static int unqueue_me(struct futex_q *q)
|
|
|
|
|
|
/*
|
|
|
* PI futexes can not be requeued and must remove themself from the
|
|
|
- * hash bucket. The hash bucket lock is held on entry and dropped here.
|
|
|
+ * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry
|
|
|
+ * and dropped here.
|
|
|
*/
|
|
|
-static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb)
|
|
|
+static void unqueue_me_pi(struct futex_q *q)
|
|
|
{
|
|
|
WARN_ON(plist_node_empty(&q->list));
|
|
|
plist_del(&q->list, &q->list.plist);
|
|
@@ -995,11 +1262,65 @@ static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb)
|
|
|
free_pi_state(q->pi_state);
|
|
|
q->pi_state = NULL;
|
|
|
|
|
|
- spin_unlock(&hb->lock);
|
|
|
+ spin_unlock(q->lock_ptr);
|
|
|
|
|
|
drop_futex_key_refs(&q->key);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Fixup the pi_state owner with current.
|
|
|
+ *
|
|
|
+ * The cur->mm semaphore must be held, it is released at return of this
|
|
|
+ * function.
|
|
|
+ */
|
|
|
+static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
|
|
+ struct futex_hash_bucket *hb,
|
|
|
+ struct task_struct *curr)
|
|
|
+{
|
|
|
+ u32 newtid = curr->pid | FUTEX_WAITERS;
|
|
|
+ struct futex_pi_state *pi_state = q->pi_state;
|
|
|
+ u32 uval, curval, newval;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ /* Owner died? */
|
|
|
+ if (pi_state->owner != NULL) {
|
|
|
+ spin_lock_irq(&pi_state->owner->pi_lock);
|
|
|
+ WARN_ON(list_empty(&pi_state->list));
|
|
|
+ list_del_init(&pi_state->list);
|
|
|
+ spin_unlock_irq(&pi_state->owner->pi_lock);
|
|
|
+ } else
|
|
|
+ newtid |= FUTEX_OWNER_DIED;
|
|
|
+
|
|
|
+ pi_state->owner = curr;
|
|
|
+
|
|
|
+ spin_lock_irq(&curr->pi_lock);
|
|
|
+ WARN_ON(!list_empty(&pi_state->list));
|
|
|
+ list_add(&pi_state->list, &curr->pi_state_list);
|
|
|
+ spin_unlock_irq(&curr->pi_lock);
|
|
|
+
|
|
|
+ /* Unqueue and drop the lock */
|
|
|
+ unqueue_me_pi(q);
|
|
|
+ up_read(&curr->mm->mmap_sem);
|
|
|
+ /*
|
|
|
+ * We own it, so we have to replace the pending owner
|
|
|
+ * TID. This must be atomic as we have preserve the
|
|
|
+ * owner died bit here.
|
|
|
+ */
|
|
|
+ ret = get_user(uval, uaddr);
|
|
|
+ while (!ret) {
|
|
|
+ newval = (uval & FUTEX_OWNER_DIED) | newtid;
|
|
|
+ newval |= (uval & FUTEX_WAITER_REQUEUED);
|
|
|
+ curval = futex_atomic_cmpxchg_inatomic(uaddr,
|
|
|
+ uval, newval);
|
|
|
+ if (curval == -EFAULT)
|
|
|
+ ret = -EFAULT;
|
|
|
+ if (curval == uval)
|
|
|
+ break;
|
|
|
+ uval = curval;
|
|
|
+ }
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
static long futex_wait_restart(struct restart_block *restart);
|
|
|
static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
|
|
|
{
|
|
@@ -1009,7 +1330,7 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
|
|
|
struct futex_q q;
|
|
|
u32 uval;
|
|
|
int ret;
|
|
|
- struct hrtimer_sleeper t;
|
|
|
+ struct hrtimer_sleeper t, *to = NULL;
|
|
|
int rem = 0;
|
|
|
|
|
|
q.pi_state = NULL;
|
|
@@ -1063,6 +1384,14 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
|
|
|
if (uval != val)
|
|
|
goto out_unlock_release_sem;
|
|
|
|
|
|
+ /*
|
|
|
+ * This rt_mutex_waiter structure is prepared here and will
|
|
|
+ * be used only if this task is requeued from a normal futex to
|
|
|
+ * a PI-futex with futex_requeue_pi.
|
|
|
+ */
|
|
|
+ debug_rt_mutex_init_waiter(&q.waiter);
|
|
|
+ q.waiter.task = NULL;
|
|
|
+
|
|
|
/* Only actually queue if *uaddr contained val. */
|
|
|
__queue_me(&q, hb);
|
|
|
|
|
@@ -1092,6 +1421,7 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
|
|
|
if (!abs_time)
|
|
|
schedule();
|
|
|
else {
|
|
|
+ to = &t;
|
|
|
hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
|
|
|
hrtimer_init_sleeper(&t, current);
|
|
|
t.timer.expires = *abs_time;
|
|
@@ -1119,6 +1449,66 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
|
|
|
* we are the only user of it.
|
|
|
*/
|
|
|
|
|
|
+ if (q.pi_state) {
|
|
|
+ /*
|
|
|
+ * We were woken but have been requeued on a PI-futex.
|
|
|
+ * We have to complete the lock acquisition by taking
|
|
|
+ * the rtmutex.
|
|
|
+ */
|
|
|
+
|
|
|
+ struct rt_mutex *lock = &q.pi_state->pi_mutex;
|
|
|
+
|
|
|
+ spin_lock(&lock->wait_lock);
|
|
|
+ if (unlikely(q.waiter.task)) {
|
|
|
+ remove_waiter(lock, &q.waiter);
|
|
|
+ }
|
|
|
+ spin_unlock(&lock->wait_lock);
|
|
|
+
|
|
|
+ if (rem)
|
|
|
+ ret = -ETIMEDOUT;
|
|
|
+ else
|
|
|
+ ret = rt_mutex_timed_lock(lock, to, 1);
|
|
|
+
|
|
|
+ down_read(&curr->mm->mmap_sem);
|
|
|
+ spin_lock(q.lock_ptr);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Got the lock. We might not be the anticipated owner if we
|
|
|
+ * did a lock-steal - fix up the PI-state in that case.
|
|
|
+ */
|
|
|
+ if (!ret && q.pi_state->owner != curr) {
|
|
|
+ /*
|
|
|
+ * We MUST play with the futex we were requeued on,
|
|
|
+ * NOT the current futex.
|
|
|
+ * We can retrieve it from the key of the pi_state
|
|
|
+ */
|
|
|
+ uaddr = q.pi_state->key.uaddr;
|
|
|
+
|
|
|
+ /* mmap_sem and hash_bucket lock are unlocked at
|
|
|
+ return of this function */
|
|
|
+ ret = fixup_pi_state_owner(uaddr, &q, hb, curr);
|
|
|
+ } else {
|
|
|
+ /*
|
|
|
+ * Catch the rare case, where the lock was released
|
|
|
+ * when we were on the way back before we locked
|
|
|
+ * the hash bucket.
|
|
|
+ */
|
|
|
+ if (ret && q.pi_state->owner == curr) {
|
|
|
+ if (rt_mutex_trylock(&q.pi_state->pi_mutex))
|
|
|
+ ret = 0;
|
|
|
+ }
|
|
|
+ /* Unqueue and drop the lock */
|
|
|
+ unqueue_me_pi(&q);
|
|
|
+ up_read(&curr->mm->mmap_sem);
|
|
|
+ }
|
|
|
+
|
|
|
+ debug_rt_mutex_free_waiter(&q.waiter);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+
|
|
|
+ debug_rt_mutex_free_waiter(&q.waiter);
|
|
|
+
|
|
|
/* If we were woken (and unqueued), we succeeded, whatever. */
|
|
|
if (!unqueue_me(&q))
|
|
|
return 0;
|
|
@@ -1161,6 +1551,51 @@ static long futex_wait_restart(struct restart_block *restart)
|
|
|
}
|
|
|
|
|
|
|
|
|
+static void set_pi_futex_owner(struct futex_hash_bucket *hb,
|
|
|
+ union futex_key *key, struct task_struct *p)
|
|
|
+{
|
|
|
+ struct plist_head *head;
|
|
|
+ struct futex_q *this, *next;
|
|
|
+ struct futex_pi_state *pi_state = NULL;
|
|
|
+ struct rt_mutex *lock;
|
|
|
+
|
|
|
+ /* Search a waiter that should already exists */
|
|
|
+
|
|
|
+ head = &hb->chain;
|
|
|
+
|
|
|
+ plist_for_each_entry_safe(this, next, head, list) {
|
|
|
+ if (match_futex (&this->key, key)) {
|
|
|
+ pi_state = this->pi_state;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ BUG_ON(!pi_state);
|
|
|
+
|
|
|
+ /* set p as pi_state's owner */
|
|
|
+ lock = &pi_state->pi_mutex;
|
|
|
+
|
|
|
+ spin_lock(&lock->wait_lock);
|
|
|
+ spin_lock_irq(&p->pi_lock);
|
|
|
+
|
|
|
+ list_add(&pi_state->list, &p->pi_state_list);
|
|
|
+ pi_state->owner = p;
|
|
|
+
|
|
|
+
|
|
|
+ /* set p as pi_mutex's owner */
|
|
|
+ debug_rt_mutex_proxy_lock(lock, p);
|
|
|
+ WARN_ON(rt_mutex_owner(lock));
|
|
|
+ rt_mutex_set_owner(lock, p, 0);
|
|
|
+ rt_mutex_deadlock_account_lock(lock, p);
|
|
|
+
|
|
|
+ plist_add(&rt_mutex_top_waiter(lock)->pi_list_entry,
|
|
|
+ &p->pi_waiters);
|
|
|
+ __rt_mutex_adjust_prio(p);
|
|
|
+
|
|
|
+ spin_unlock_irq(&p->pi_lock);
|
|
|
+ spin_unlock(&lock->wait_lock);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Userspace tried a 0 -> TID atomic transition of the futex value
|
|
|
* and failed. The kernel side here does the whole locking operation:
|
|
@@ -1175,7 +1610,7 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
|
|
|
struct futex_hash_bucket *hb;
|
|
|
u32 uval, newval, curval;
|
|
|
struct futex_q q;
|
|
|
- int ret, attempt = 0;
|
|
|
+ int ret, lock_held, attempt = 0;
|
|
|
|
|
|
if (refill_pi_state_cache())
|
|
|
return -ENOMEM;
|
|
@@ -1198,6 +1633,8 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
|
|
|
hb = queue_lock(&q, -1, NULL);
|
|
|
|
|
|
retry_locked:
|
|
|
+ lock_held = 0;
|
|
|
+
|
|
|
/*
|
|
|
* To avoid races, we attempt to take the lock here again
|
|
|
* (by doing a 0 -> TID atomic cmpxchg), while holding all
|
|
@@ -1216,7 +1653,16 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
|
|
|
if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
|
|
|
if (!detect && 0)
|
|
|
force_sig(SIGKILL, current);
|
|
|
- ret = -EDEADLK;
|
|
|
+ /*
|
|
|
+ * Normally, this check is done in user space.
|
|
|
+ * In case of requeue, the owner may attempt to lock this futex,
|
|
|
+ * even if the ownership has already been given by the previous
|
|
|
+ * waker.
|
|
|
+ * In the usual case, this is a case of deadlock, but not in case
|
|
|
+ * of REQUEUE_PI.
|
|
|
+ */
|
|
|
+ if (!(curval & FUTEX_WAITER_REQUEUED))
|
|
|
+ ret = -EDEADLK;
|
|
|
goto out_unlock_release_sem;
|
|
|
}
|
|
|
|
|
@@ -1228,7 +1674,18 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
|
|
|
goto out_unlock_release_sem;
|
|
|
|
|
|
uval = curval;
|
|
|
- newval = uval | FUTEX_WAITERS;
|
|
|
+ /*
|
|
|
+ * In case of a requeue, check if there already is an owner
|
|
|
+ * If not, just take the futex.
|
|
|
+ */
|
|
|
+ if ((curval & FUTEX_WAITER_REQUEUED) && !(curval & FUTEX_TID_MASK)) {
|
|
|
+ /* set current as futex owner */
|
|
|
+ newval = curval | current->pid;
|
|
|
+ lock_held = 1;
|
|
|
+ } else
|
|
|
+ /* Set the WAITERS flag, so the owner will know it has someone
|
|
|
+ to wake at next unlock */
|
|
|
+ newval = curval | FUTEX_WAITERS;
|
|
|
|
|
|
pagefault_disable();
|
|
|
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
|
|
@@ -1239,11 +1696,16 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
|
|
|
if (unlikely(curval != uval))
|
|
|
goto retry_locked;
|
|
|
|
|
|
+ if (lock_held) {
|
|
|
+ set_pi_futex_owner(hb, &q.key, curr);
|
|
|
+ goto out_unlock_release_sem;
|
|
|
+ }
|
|
|
+
|
|
|
/*
|
|
|
* We dont have the lock. Look up the PI state (or create it if
|
|
|
* we are the first waiter):
|
|
|
*/
|
|
|
- ret = lookup_pi_state(uval, hb, &q);
|
|
|
+ ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);
|
|
|
|
|
|
if (unlikely(ret)) {
|
|
|
/*
|
|
@@ -1306,45 +1768,10 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
|
|
|
* Got the lock. We might not be the anticipated owner if we
|
|
|
* did a lock-steal - fix up the PI-state in that case.
|
|
|
*/
|
|
|
- if (!ret && q.pi_state->owner != curr) {
|
|
|
- u32 newtid = current->pid | FUTEX_WAITERS;
|
|
|
-
|
|
|
- /* Owner died? */
|
|
|
- if (q.pi_state->owner != NULL) {
|
|
|
- spin_lock_irq(&q.pi_state->owner->pi_lock);
|
|
|
- WARN_ON(list_empty(&q.pi_state->list));
|
|
|
- list_del_init(&q.pi_state->list);
|
|
|
- spin_unlock_irq(&q.pi_state->owner->pi_lock);
|
|
|
- } else
|
|
|
- newtid |= FUTEX_OWNER_DIED;
|
|
|
-
|
|
|
- q.pi_state->owner = current;
|
|
|
-
|
|
|
- spin_lock_irq(¤t->pi_lock);
|
|
|
- WARN_ON(!list_empty(&q.pi_state->list));
|
|
|
- list_add(&q.pi_state->list, ¤t->pi_state_list);
|
|
|
- spin_unlock_irq(¤t->pi_lock);
|
|
|
-
|
|
|
- /* Unqueue and drop the lock */
|
|
|
- unqueue_me_pi(&q, hb);
|
|
|
- up_read(&curr->mm->mmap_sem);
|
|
|
- /*
|
|
|
- * We own it, so we have to replace the pending owner
|
|
|
- * TID. This must be atomic as we have preserve the
|
|
|
- * owner died bit here.
|
|
|
- */
|
|
|
- ret = get_user(uval, uaddr);
|
|
|
- while (!ret) {
|
|
|
- newval = (uval & FUTEX_OWNER_DIED) | newtid;
|
|
|
- curval = futex_atomic_cmpxchg_inatomic(uaddr,
|
|
|
- uval, newval);
|
|
|
- if (curval == -EFAULT)
|
|
|
- ret = -EFAULT;
|
|
|
- if (curval == uval)
|
|
|
- break;
|
|
|
- uval = curval;
|
|
|
- }
|
|
|
- } else {
|
|
|
+ if (!ret && q.pi_state->owner != curr)
|
|
|
+ /* mmap_sem is unlocked at return of this function */
|
|
|
+ ret = fixup_pi_state_owner(uaddr, &q, hb, curr);
|
|
|
+ else {
|
|
|
/*
|
|
|
* Catch the rare case, where the lock was released
|
|
|
* when we were on the way back before we locked
|
|
@@ -1355,7 +1782,7 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
|
|
|
ret = 0;
|
|
|
}
|
|
|
/* Unqueue and drop the lock */
|
|
|
- unqueue_me_pi(&q, hb);
|
|
|
+ unqueue_me_pi(&q);
|
|
|
up_read(&curr->mm->mmap_sem);
|
|
|
}
|
|
|
|
|
@@ -1724,6 +2151,8 @@ retry:
|
|
|
* userspace.
|
|
|
*/
|
|
|
mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
|
|
|
+ /* Also keep the FUTEX_WAITER_REQUEUED flag if set */
|
|
|
+ mval |= (uval & FUTEX_WAITER_REQUEUED);
|
|
|
nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
|
|
|
|
|
|
if (nval == -EFAULT)
|
|
@@ -1854,6 +2283,9 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
|
|
|
case FUTEX_TRYLOCK_PI:
|
|
|
ret = futex_lock_pi(uaddr, 0, timeout, 1);
|
|
|
break;
|
|
|
+ case FUTEX_CMP_REQUEUE_PI:
|
|
|
+ ret = futex_requeue_pi(uaddr, uaddr2, val, val2, &val3);
|
|
|
+ break;
|
|
|
default:
|
|
|
ret = -ENOSYS;
|
|
|
}
|
|
@@ -1883,7 +2315,8 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
|
|
|
/*
|
|
|
* requeue parameter in 'utime' if op == FUTEX_REQUEUE.
|
|
|
*/
|
|
|
- if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
|
|
|
+ if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
|
|
|
+ || op == FUTEX_CMP_REQUEUE_PI)
|
|
|
val2 = (u32) (unsigned long) utime;
|
|
|
|
|
|
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
|