|
@@ -16,6 +16,9 @@
|
|
|
* Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
|
|
|
* Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
|
|
|
*
|
|
|
+ * PRIVATE futexes by Eric Dumazet
|
|
|
+ * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
|
|
|
+ *
|
|
|
* Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
|
|
|
* enough at me, Linus for the original (flawed) idea, Matthew
|
|
|
* Kirkwood for proof-of-concept implementation.
|
|
@@ -150,19 +153,26 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
|
|
|
&& key1->both.offset == key2->both.offset);
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Get parameters which are the keys for a futex.
|
|
|
+/**
|
|
|
+ * get_futex_key - Get parameters which are the keys for a futex.
|
|
|
+ * @uaddr: virtual address of the futex
|
|
|
+ * @shared: NULL for a PROCESS_PRIVATE futex,
|
|
|
+ * ¤t->mm->mmap_sem for a PROCESS_SHARED futex
|
|
|
+ * @key: address where result is stored.
|
|
|
+ *
|
|
|
+ * Returns a negative error code or 0
|
|
|
+ * The key words are stored in *key on success.
|
|
|
*
|
|
|
* For shared mappings, it's (page->index, vma->vm_file->f_path.dentry->d_inode,
|
|
|
* offset_within_page). For private mappings, it's (uaddr, current->mm).
|
|
|
* We can usually work out the index without swapping in the page.
|
|
|
*
|
|
|
- * Returns: 0, or negative error code.
|
|
|
- * The key words are stored in *key on success.
|
|
|
- *
|
|
|
- * Should be called with ¤t->mm->mmap_sem but NOT any spinlocks.
|
|
|
+ * fshared is NULL for PROCESS_PRIVATE futexes
|
|
|
+ * For other futexes, it points to ¤t->mm->mmap_sem and
|
|
|
+ * caller must have taken the reader lock. but NOT any spinlocks.
|
|
|
*/
|
|
|
-int get_futex_key(u32 __user *uaddr, union futex_key *key)
|
|
|
+int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
|
|
|
+ union futex_key *key)
|
|
|
{
|
|
|
unsigned long address = (unsigned long)uaddr;
|
|
|
struct mm_struct *mm = current->mm;
|
|
@@ -174,10 +184,24 @@ int get_futex_key(u32 __user *uaddr, union futex_key *key)
|
|
|
* The futex address must be "naturally" aligned.
|
|
|
*/
|
|
|
key->both.offset = address % PAGE_SIZE;
|
|
|
- if (unlikely((key->both.offset % sizeof(u32)) != 0))
|
|
|
+ if (unlikely((address % sizeof(u32)) != 0))
|
|
|
return -EINVAL;
|
|
|
address -= key->both.offset;
|
|
|
|
|
|
+ /*
|
|
|
+ * PROCESS_PRIVATE futexes are fast.
|
|
|
+ * As the mm cannot disappear under us and the 'key' only needs
|
|
|
+ * virtual address, we dont even have to find the underlying vma.
|
|
|
+ * Note : We do have to check 'uaddr' is a valid user address,
|
|
|
+ * but access_ok() should be faster than find_vma()
|
|
|
+ */
|
|
|
+ if (!fshared) {
|
|
|
+ if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
|
|
|
+ return -EFAULT;
|
|
|
+ key->private.mm = mm;
|
|
|
+ key->private.address = address;
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
/*
|
|
|
* The futex is hashed differently depending on whether
|
|
|
* it's in a shared or private mapping. So check vma first.
|
|
@@ -205,6 +229,7 @@ int get_futex_key(u32 __user *uaddr, union futex_key *key)
|
|
|
* mappings of _writable_ handles.
|
|
|
*/
|
|
|
if (likely(!(vma->vm_flags & VM_MAYSHARE))) {
|
|
|
+ key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */
|
|
|
key->private.mm = mm;
|
|
|
key->private.address = address;
|
|
|
return 0;
|
|
@@ -214,7 +239,7 @@ int get_futex_key(u32 __user *uaddr, union futex_key *key)
|
|
|
* Linear file mappings are also simple.
|
|
|
*/
|
|
|
key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
|
|
|
- key->both.offset++; /* Bit 0 of offset indicates inode-based key. */
|
|
|
+ key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
|
|
|
if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
|
|
|
key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
|
|
|
+ vma->vm_pgoff);
|
|
@@ -242,16 +267,18 @@ EXPORT_SYMBOL_GPL(get_futex_key);
|
|
|
* Take a reference to the resource addressed by a key.
|
|
|
* Can be called while holding spinlocks.
|
|
|
*
|
|
|
- * NOTE: mmap_sem MUST be held between get_futex_key() and calling this
|
|
|
- * function, if it is called at all. mmap_sem keeps key->shared.inode valid.
|
|
|
*/
|
|
|
inline void get_futex_key_refs(union futex_key *key)
|
|
|
{
|
|
|
- if (key->both.ptr != 0) {
|
|
|
- if (key->both.offset & 1)
|
|
|
+ if (key->both.ptr == 0)
|
|
|
+ return;
|
|
|
+ switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
|
|
|
+ case FUT_OFF_INODE:
|
|
|
atomic_inc(&key->shared.inode->i_count);
|
|
|
- else
|
|
|
+ break;
|
|
|
+ case FUT_OFF_MMSHARED:
|
|
|
atomic_inc(&key->private.mm->mm_count);
|
|
|
+ break;
|
|
|
}
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(get_futex_key_refs);
|
|
@@ -262,11 +289,15 @@ EXPORT_SYMBOL_GPL(get_futex_key_refs);
|
|
|
*/
|
|
|
void drop_futex_key_refs(union futex_key *key)
|
|
|
{
|
|
|
- if (key->both.ptr != 0) {
|
|
|
- if (key->both.offset & 1)
|
|
|
+ if (key->both.ptr == 0)
|
|
|
+ return;
|
|
|
+ switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
|
|
|
+ case FUT_OFF_INODE:
|
|
|
iput(key->shared.inode);
|
|
|
- else
|
|
|
+ break;
|
|
|
+ case FUT_OFF_MMSHARED:
|
|
|
mmdrop(key->private.mm);
|
|
|
+ break;
|
|
|
}
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(drop_futex_key_refs);
|
|
@@ -283,28 +314,38 @@ static inline int get_futex_value_locked(u32 *dest, u32 __user *from)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Fault handling. Called with current->mm->mmap_sem held.
|
|
|
+ * Fault handling.
|
|
|
+ * if fshared is non NULL, current->mm->mmap_sem is already held
|
|
|
*/
|
|
|
-static int futex_handle_fault(unsigned long address, int attempt)
|
|
|
+static int futex_handle_fault(unsigned long address,
|
|
|
+ struct rw_semaphore *fshared, int attempt)
|
|
|
{
|
|
|
struct vm_area_struct * vma;
|
|
|
struct mm_struct *mm = current->mm;
|
|
|
+ int ret = -EFAULT;
|
|
|
|
|
|
- if (attempt > 2 || !(vma = find_vma(mm, address)) ||
|
|
|
- vma->vm_start > address || !(vma->vm_flags & VM_WRITE))
|
|
|
- return -EFAULT;
|
|
|
+ if (attempt > 2)
|
|
|
+ return ret;
|
|
|
|
|
|
- switch (handle_mm_fault(mm, vma, address, 1)) {
|
|
|
- case VM_FAULT_MINOR:
|
|
|
- current->min_flt++;
|
|
|
- break;
|
|
|
- case VM_FAULT_MAJOR:
|
|
|
- current->maj_flt++;
|
|
|
- break;
|
|
|
- default:
|
|
|
- return -EFAULT;
|
|
|
+ if (!fshared)
|
|
|
+ down_read(&mm->mmap_sem);
|
|
|
+ vma = find_vma(mm, address);
|
|
|
+ if (vma && address >= vma->vm_start &&
|
|
|
+ (vma->vm_flags & VM_WRITE)) {
|
|
|
+ switch (handle_mm_fault(mm, vma, address, 1)) {
|
|
|
+ case VM_FAULT_MINOR:
|
|
|
+ ret = 0;
|
|
|
+ current->min_flt++;
|
|
|
+ break;
|
|
|
+ case VM_FAULT_MAJOR:
|
|
|
+ ret = 0;
|
|
|
+ current->maj_flt++;
|
|
|
+ break;
|
|
|
+ }
|
|
|
}
|
|
|
- return 0;
|
|
|
+ if (!fshared)
|
|
|
+ up_read(&mm->mmap_sem);
|
|
|
+ return ret;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -647,7 +688,8 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
|
|
|
* Wake up all waiters hashed on the physical page that is mapped
|
|
|
* to this virtual address:
|
|
|
*/
|
|
|
-static int futex_wake(u32 __user *uaddr, int nr_wake)
|
|
|
+static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
|
|
|
+ int nr_wake)
|
|
|
{
|
|
|
struct futex_hash_bucket *hb;
|
|
|
struct futex_q *this, *next;
|
|
@@ -655,9 +697,10 @@ static int futex_wake(u32 __user *uaddr, int nr_wake)
|
|
|
union futex_key key;
|
|
|
int ret;
|
|
|
|
|
|
- down_read(¤t->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ down_read(fshared);
|
|
|
|
|
|
- ret = get_futex_key(uaddr, &key);
|
|
|
+ ret = get_futex_key(uaddr, fshared, &key);
|
|
|
if (unlikely(ret != 0))
|
|
|
goto out;
|
|
|
|
|
@@ -679,7 +722,8 @@ static int futex_wake(u32 __user *uaddr, int nr_wake)
|
|
|
|
|
|
spin_unlock(&hb->lock);
|
|
|
out:
|
|
|
- up_read(¤t->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -746,7 +790,9 @@ retry:
|
|
|
* and requeue the next nr_requeue waiters following hashed on
|
|
|
* one physical page to another physical page (PI-futex uaddr2)
|
|
|
*/
|
|
|
-static int futex_requeue_pi(u32 __user *uaddr1, u32 __user *uaddr2,
|
|
|
+static int futex_requeue_pi(u32 __user *uaddr1,
|
|
|
+ struct rw_semaphore *fshared,
|
|
|
+ u32 __user *uaddr2,
|
|
|
int nr_wake, int nr_requeue, u32 *cmpval)
|
|
|
{
|
|
|
union futex_key key1, key2;
|
|
@@ -765,12 +811,13 @@ retry:
|
|
|
/*
|
|
|
* First take all the futex related locks:
|
|
|
*/
|
|
|
- down_read(¤t->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ down_read(fshared);
|
|
|
|
|
|
- ret = get_futex_key(uaddr1, &key1);
|
|
|
+ ret = get_futex_key(uaddr1, fshared, &key1);
|
|
|
if (unlikely(ret != 0))
|
|
|
goto out;
|
|
|
- ret = get_futex_key(uaddr2, &key2);
|
|
|
+ ret = get_futex_key(uaddr2, fshared, &key2);
|
|
|
if (unlikely(ret != 0))
|
|
|
goto out;
|
|
|
|
|
@@ -793,7 +840,8 @@ retry:
|
|
|
* If we would have faulted, release mmap_sem, fault
|
|
|
* it in and start all over again.
|
|
|
*/
|
|
|
- up_read(¤t->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
|
|
|
ret = get_user(curval, uaddr1);
|
|
|
|
|
@@ -927,7 +975,8 @@ out_unlock:
|
|
|
drop_futex_key_refs(&key1);
|
|
|
|
|
|
out:
|
|
|
- up_read(¤t->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -936,7 +985,8 @@ out:
|
|
|
* to this virtual address:
|
|
|
*/
|
|
|
static int
|
|
|
-futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2,
|
|
|
+futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared,
|
|
|
+ u32 __user *uaddr2,
|
|
|
int nr_wake, int nr_wake2, int op)
|
|
|
{
|
|
|
union futex_key key1, key2;
|
|
@@ -946,12 +996,13 @@ futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2,
|
|
|
int ret, op_ret, attempt = 0;
|
|
|
|
|
|
retryfull:
|
|
|
- down_read(¤t->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ down_read(fshared);
|
|
|
|
|
|
- ret = get_futex_key(uaddr1, &key1);
|
|
|
+ ret = get_futex_key(uaddr1, fshared, &key1);
|
|
|
if (unlikely(ret != 0))
|
|
|
goto out;
|
|
|
- ret = get_futex_key(uaddr2, &key2);
|
|
|
+ ret = get_futex_key(uaddr2, fshared, &key2);
|
|
|
if (unlikely(ret != 0))
|
|
|
goto out;
|
|
|
|
|
@@ -991,11 +1042,10 @@ retry:
|
|
|
* still holding the mmap_sem.
|
|
|
*/
|
|
|
if (attempt++) {
|
|
|
- if (futex_handle_fault((unsigned long)uaddr2,
|
|
|
- attempt)) {
|
|
|
- ret = -EFAULT;
|
|
|
+ ret = futex_handle_fault((unsigned long)uaddr2,
|
|
|
+ fshared, attempt);
|
|
|
+ if (ret)
|
|
|
goto out;
|
|
|
- }
|
|
|
goto retry;
|
|
|
}
|
|
|
|
|
@@ -1003,7 +1053,8 @@ retry:
|
|
|
* If we would have faulted, release mmap_sem,
|
|
|
* fault it in and start all over again.
|
|
|
*/
|
|
|
- up_read(¤t->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
|
|
|
ret = get_user(dummy, uaddr2);
|
|
|
if (ret)
|
|
@@ -1040,7 +1091,8 @@ retry:
|
|
|
if (hb1 != hb2)
|
|
|
spin_unlock(&hb2->lock);
|
|
|
out:
|
|
|
- up_read(¤t->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -1048,7 +1100,8 @@ out:
|
|
|
* Requeue all waiters hashed on one physical page to another
|
|
|
* physical page.
|
|
|
*/
|
|
|
-static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
|
|
|
+static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
|
|
|
+ u32 __user *uaddr2,
|
|
|
int nr_wake, int nr_requeue, u32 *cmpval)
|
|
|
{
|
|
|
union futex_key key1, key2;
|
|
@@ -1058,12 +1111,13 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
|
|
|
int ret, drop_count = 0;
|
|
|
|
|
|
retry:
|
|
|
- down_read(¤t->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ down_read(fshared);
|
|
|
|
|
|
- ret = get_futex_key(uaddr1, &key1);
|
|
|
+ ret = get_futex_key(uaddr1, fshared, &key1);
|
|
|
if (unlikely(ret != 0))
|
|
|
goto out;
|
|
|
- ret = get_futex_key(uaddr2, &key2);
|
|
|
+ ret = get_futex_key(uaddr2, fshared, &key2);
|
|
|
if (unlikely(ret != 0))
|
|
|
goto out;
|
|
|
|
|
@@ -1086,7 +1140,8 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
|
|
|
* If we would have faulted, release mmap_sem, fault
|
|
|
* it in and start all over again.
|
|
|
*/
|
|
|
- up_read(¤t->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
|
|
|
ret = get_user(curval, uaddr1);
|
|
|
|
|
@@ -1139,7 +1194,8 @@ out_unlock:
|
|
|
drop_futex_key_refs(&key1);
|
|
|
|
|
|
out:
|
|
|
- up_read(¤t->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -1273,7 +1329,8 @@ static void unqueue_me_pi(struct futex_q *q)
|
|
|
* The cur->mm semaphore must be held, it is released at return of this
|
|
|
* function.
|
|
|
*/
|
|
|
-static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
|
|
+static int fixup_pi_state_owner(u32 __user *uaddr, struct rw_semaphore *fshared,
|
|
|
+ struct futex_q *q,
|
|
|
struct futex_hash_bucket *hb,
|
|
|
struct task_struct *curr)
|
|
|
{
|
|
@@ -1300,7 +1357,8 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
|
|
|
|
|
/* Unqueue and drop the lock */
|
|
|
unqueue_me_pi(q);
|
|
|
- up_read(&curr->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
/*
|
|
|
* We own it, so we have to replace the pending owner
|
|
|
* TID. This must be atomic as we have preserve the
|
|
@@ -1321,8 +1379,15 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * In case we must use restart_block to restart a futex_wait,
|
|
|
+ * we encode in the 'arg3' shared capability
|
|
|
+ */
|
|
|
+#define ARG3_SHARED 1
|
|
|
+
|
|
|
static long futex_wait_restart(struct restart_block *restart);
|
|
|
-static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
|
|
|
+static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
|
|
|
+ u32 val, ktime_t *abs_time)
|
|
|
{
|
|
|
struct task_struct *curr = current;
|
|
|
DECLARE_WAITQUEUE(wait, curr);
|
|
@@ -1335,9 +1400,10 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
|
|
|
|
|
|
q.pi_state = NULL;
|
|
|
retry:
|
|
|
- down_read(&curr->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ down_read(fshared);
|
|
|
|
|
|
- ret = get_futex_key(uaddr, &q.key);
|
|
|
+ ret = get_futex_key(uaddr, fshared, &q.key);
|
|
|
if (unlikely(ret != 0))
|
|
|
goto out_release_sem;
|
|
|
|
|
@@ -1360,8 +1426,8 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
|
|
|
* a wakeup when *uaddr != val on entry to the syscall. This is
|
|
|
* rare, but normal.
|
|
|
*
|
|
|
- * We hold the mmap semaphore, so the mapping cannot have changed
|
|
|
- * since we looked it up in get_futex_key.
|
|
|
+ * for shared futexes, we hold the mmap semaphore, so the mapping
|
|
|
+ * cannot have changed since we looked it up in get_futex_key.
|
|
|
*/
|
|
|
ret = get_futex_value_locked(&uval, uaddr);
|
|
|
|
|
@@ -1372,7 +1438,8 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
|
|
|
* If we would have faulted, release mmap_sem, fault it in and
|
|
|
* start all over again.
|
|
|
*/
|
|
|
- up_read(&curr->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
|
|
|
ret = get_user(uval, uaddr);
|
|
|
|
|
@@ -1399,7 +1466,8 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
|
|
|
* Now the futex is queued and we have checked the data, we
|
|
|
* don't want to hold mmap_sem while we sleep.
|
|
|
*/
|
|
|
- up_read(&curr->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
|
|
|
/*
|
|
|
* There might have been scheduling since the queue_me(), as we
|
|
@@ -1469,7 +1537,8 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
|
|
|
else
|
|
|
ret = rt_mutex_timed_lock(lock, to, 1);
|
|
|
|
|
|
- down_read(&curr->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ down_read(fshared);
|
|
|
spin_lock(q.lock_ptr);
|
|
|
|
|
|
/*
|
|
@@ -1486,7 +1555,8 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
|
|
|
|
|
|
/* mmap_sem and hash_bucket lock are unlocked at
|
|
|
return of this function */
|
|
|
- ret = fixup_pi_state_owner(uaddr, &q, hb, curr);
|
|
|
+ ret = fixup_pi_state_owner(uaddr, fshared,
|
|
|
+ &q, hb, curr);
|
|
|
} else {
|
|
|
/*
|
|
|
* Catch the rare case, where the lock was released
|
|
@@ -1499,7 +1569,8 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
|
|
|
}
|
|
|
/* Unqueue and drop the lock */
|
|
|
unqueue_me_pi(&q);
|
|
|
- up_read(&curr->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
}
|
|
|
|
|
|
debug_rt_mutex_free_waiter(&q.waiter);
|
|
@@ -1528,6 +1599,9 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
|
|
|
restart->arg0 = (unsigned long)uaddr;
|
|
|
restart->arg1 = (unsigned long)val;
|
|
|
restart->arg2 = (unsigned long)abs_time;
|
|
|
+ restart->arg3 = 0;
|
|
|
+ if (fshared)
|
|
|
+ restart->arg3 |= ARG3_SHARED;
|
|
|
return -ERESTART_RESTARTBLOCK;
|
|
|
}
|
|
|
|
|
@@ -1535,7 +1609,8 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time)
|
|
|
queue_unlock(&q, hb);
|
|
|
|
|
|
out_release_sem:
|
|
|
- up_read(&curr->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -1545,9 +1620,12 @@ static long futex_wait_restart(struct restart_block *restart)
|
|
|
u32 __user *uaddr = (u32 __user *)restart->arg0;
|
|
|
u32 val = (u32)restart->arg1;
|
|
|
ktime_t *abs_time = (ktime_t *)restart->arg2;
|
|
|
+ struct rw_semaphore *fshared = NULL;
|
|
|
|
|
|
restart->fn = do_no_restart_syscall;
|
|
|
- return (long)futex_wait(uaddr, val, abs_time);
|
|
|
+ if (restart->arg3 & ARG3_SHARED)
|
|
|
+ fshared = ¤t->mm->mmap_sem;
|
|
|
+ return (long)futex_wait(uaddr, fshared, val, abs_time);
|
|
|
}
|
|
|
|
|
|
|
|
@@ -1602,8 +1680,8 @@ static void set_pi_futex_owner(struct futex_hash_bucket *hb,
|
|
|
* if there are waiters then it will block, it does PI, etc. (Due to
|
|
|
* races the kernel might see a 0 value of the futex too.)
|
|
|
*/
|
|
|
-static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
|
|
|
- int trylock)
|
|
|
+static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
|
|
|
+ int detect, ktime_t *time, int trylock)
|
|
|
{
|
|
|
struct hrtimer_sleeper timeout, *to = NULL;
|
|
|
struct task_struct *curr = current;
|
|
@@ -1624,9 +1702,10 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
|
|
|
|
|
|
q.pi_state = NULL;
|
|
|
retry:
|
|
|
- down_read(&curr->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ down_read(fshared);
|
|
|
|
|
|
- ret = get_futex_key(uaddr, &q.key);
|
|
|
+ ret = get_futex_key(uaddr, fshared, &q.key);
|
|
|
if (unlikely(ret != 0))
|
|
|
goto out_release_sem;
|
|
|
|
|
@@ -1747,7 +1826,8 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
|
|
|
* Now the futex is queued and we have checked the data, we
|
|
|
* don't want to hold mmap_sem while we sleep.
|
|
|
*/
|
|
|
- up_read(&curr->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
|
|
|
WARN_ON(!q.pi_state);
|
|
|
/*
|
|
@@ -1761,7 +1841,8 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
|
|
|
ret = ret ? 0 : -EWOULDBLOCK;
|
|
|
}
|
|
|
|
|
|
- down_read(&curr->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ down_read(fshared);
|
|
|
spin_lock(q.lock_ptr);
|
|
|
|
|
|
/*
|
|
@@ -1770,7 +1851,7 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
|
|
|
*/
|
|
|
if (!ret && q.pi_state->owner != curr)
|
|
|
/* mmap_sem is unlocked at return of this function */
|
|
|
- ret = fixup_pi_state_owner(uaddr, &q, hb, curr);
|
|
|
+ ret = fixup_pi_state_owner(uaddr, fshared, &q, hb, curr);
|
|
|
else {
|
|
|
/*
|
|
|
* Catch the rare case, where the lock was released
|
|
@@ -1783,7 +1864,8 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
|
|
|
}
|
|
|
/* Unqueue and drop the lock */
|
|
|
unqueue_me_pi(&q);
|
|
|
- up_read(&curr->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
}
|
|
|
|
|
|
if (!detect && ret == -EDEADLK && 0)
|
|
@@ -1795,7 +1877,8 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
|
|
|
queue_unlock(&q, hb);
|
|
|
|
|
|
out_release_sem:
|
|
|
- up_read(&curr->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
return ret;
|
|
|
|
|
|
uaddr_faulted:
|
|
@@ -1806,15 +1889,16 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
|
|
|
* still holding the mmap_sem.
|
|
|
*/
|
|
|
if (attempt++) {
|
|
|
- if (futex_handle_fault((unsigned long)uaddr, attempt)) {
|
|
|
- ret = -EFAULT;
|
|
|
+ ret = futex_handle_fault((unsigned long)uaddr, fshared,
|
|
|
+ attempt);
|
|
|
+ if (ret)
|
|
|
goto out_unlock_release_sem;
|
|
|
- }
|
|
|
goto retry_locked;
|
|
|
}
|
|
|
|
|
|
queue_unlock(&q, hb);
|
|
|
- up_read(&curr->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
|
|
|
ret = get_user(uval, uaddr);
|
|
|
if (!ret && (uval != -EFAULT))
|
|
@@ -1828,7 +1912,7 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time,
|
|
|
* This is the in-kernel slowpath: we look up the PI state (if any),
|
|
|
* and do the rt-mutex unlock.
|
|
|
*/
|
|
|
-static int futex_unlock_pi(u32 __user *uaddr)
|
|
|
+static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared)
|
|
|
{
|
|
|
struct futex_hash_bucket *hb;
|
|
|
struct futex_q *this, *next;
|
|
@@ -1848,9 +1932,10 @@ retry:
|
|
|
/*
|
|
|
* First take all the futex related locks:
|
|
|
*/
|
|
|
- down_read(¤t->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ down_read(fshared);
|
|
|
|
|
|
- ret = get_futex_key(uaddr, &key);
|
|
|
+ ret = get_futex_key(uaddr, fshared, &key);
|
|
|
if (unlikely(ret != 0))
|
|
|
goto out;
|
|
|
|
|
@@ -1909,7 +1994,8 @@ retry_locked:
|
|
|
out_unlock:
|
|
|
spin_unlock(&hb->lock);
|
|
|
out:
|
|
|
- up_read(¤t->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
|
|
|
return ret;
|
|
|
|
|
@@ -1921,15 +2007,16 @@ pi_faulted:
|
|
|
* still holding the mmap_sem.
|
|
|
*/
|
|
|
if (attempt++) {
|
|
|
- if (futex_handle_fault((unsigned long)uaddr, attempt)) {
|
|
|
- ret = -EFAULT;
|
|
|
+ ret = futex_handle_fault((unsigned long)uaddr, fshared,
|
|
|
+ attempt);
|
|
|
+ if (ret)
|
|
|
goto out_unlock;
|
|
|
- }
|
|
|
goto retry_locked;
|
|
|
}
|
|
|
|
|
|
spin_unlock(&hb->lock);
|
|
|
- up_read(¤t->mm->mmap_sem);
|
|
|
+ if (fshared)
|
|
|
+ up_read(fshared);
|
|
|
|
|
|
ret = get_user(uval, uaddr);
|
|
|
if (!ret && (uval != -EFAULT))
|
|
@@ -1981,6 +2068,7 @@ static int futex_fd(u32 __user *uaddr, int signal)
|
|
|
struct futex_q *q;
|
|
|
struct file *filp;
|
|
|
int ret, err;
|
|
|
+ struct rw_semaphore *fshared;
|
|
|
static unsigned long printk_interval;
|
|
|
|
|
|
if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {
|
|
@@ -2022,11 +2110,12 @@ static int futex_fd(u32 __user *uaddr, int signal)
|
|
|
}
|
|
|
q->pi_state = NULL;
|
|
|
|
|
|
- down_read(¤t->mm->mmap_sem);
|
|
|
- err = get_futex_key(uaddr, &q->key);
|
|
|
+ fshared = ¤t->mm->mmap_sem;
|
|
|
+ down_read(fshared);
|
|
|
+ err = get_futex_key(uaddr, fshared, &q->key);
|
|
|
|
|
|
if (unlikely(err != 0)) {
|
|
|
- up_read(¤t->mm->mmap_sem);
|
|
|
+ up_read(fshared);
|
|
|
kfree(q);
|
|
|
goto error;
|
|
|
}
|
|
@@ -2038,7 +2127,7 @@ static int futex_fd(u32 __user *uaddr, int signal)
|
|
|
filp->private_data = q;
|
|
|
|
|
|
queue_me(q, ret, filp);
|
|
|
- up_read(¤t->mm->mmap_sem);
|
|
|
+ up_read(fshared);
|
|
|
|
|
|
/* Now we map fd to filp, so userspace can access it */
|
|
|
fd_install(ret, filp);
|
|
@@ -2167,7 +2256,7 @@ retry:
|
|
|
*/
|
|
|
if (!pi) {
|
|
|
if (uval & FUTEX_WAITERS)
|
|
|
- futex_wake(uaddr, 1);
|
|
|
+ futex_wake(uaddr, &curr->mm->mmap_sem, 1);
|
|
|
}
|
|
|
}
|
|
|
return 0;
|
|
@@ -2223,7 +2312,8 @@ void exit_robust_list(struct task_struct *curr)
|
|
|
return;
|
|
|
|
|
|
if (pending)
|
|
|
- handle_futex_death((void __user *)pending + futex_offset, curr, pip);
|
|
|
+ handle_futex_death((void __user *)pending + futex_offset,
|
|
|
+ curr, pip);
|
|
|
|
|
|
while (entry != &head->list) {
|
|
|
/*
|
|
@@ -2253,38 +2343,43 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
|
|
|
u32 __user *uaddr2, u32 val2, u32 val3)
|
|
|
{
|
|
|
int ret;
|
|
|
+ int cmd = op & FUTEX_CMD_MASK;
|
|
|
+ struct rw_semaphore *fshared = NULL;
|
|
|
+
|
|
|
+ if (!(op & FUTEX_PRIVATE_FLAG))
|
|
|
+ fshared = ¤t->mm->mmap_sem;
|
|
|
|
|
|
- switch (op) {
|
|
|
+ switch (cmd) {
|
|
|
case FUTEX_WAIT:
|
|
|
- ret = futex_wait(uaddr, val, timeout);
|
|
|
+ ret = futex_wait(uaddr, fshared, val, timeout);
|
|
|
break;
|
|
|
case FUTEX_WAKE:
|
|
|
- ret = futex_wake(uaddr, val);
|
|
|
+ ret = futex_wake(uaddr, fshared, val);
|
|
|
break;
|
|
|
case FUTEX_FD:
|
|
|
/* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */
|
|
|
ret = futex_fd(uaddr, val);
|
|
|
break;
|
|
|
case FUTEX_REQUEUE:
|
|
|
- ret = futex_requeue(uaddr, uaddr2, val, val2, NULL);
|
|
|
+ ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL);
|
|
|
break;
|
|
|
case FUTEX_CMP_REQUEUE:
|
|
|
- ret = futex_requeue(uaddr, uaddr2, val, val2, &val3);
|
|
|
+ ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3);
|
|
|
break;
|
|
|
case FUTEX_WAKE_OP:
|
|
|
- ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
|
|
|
+ ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
|
|
|
break;
|
|
|
case FUTEX_LOCK_PI:
|
|
|
- ret = futex_lock_pi(uaddr, val, timeout, 0);
|
|
|
+ ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
|
|
|
break;
|
|
|
case FUTEX_UNLOCK_PI:
|
|
|
- ret = futex_unlock_pi(uaddr);
|
|
|
+ ret = futex_unlock_pi(uaddr, fshared);
|
|
|
break;
|
|
|
case FUTEX_TRYLOCK_PI:
|
|
|
- ret = futex_lock_pi(uaddr, 0, timeout, 1);
|
|
|
+ ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
|
|
|
break;
|
|
|
case FUTEX_CMP_REQUEUE_PI:
|
|
|
- ret = futex_requeue_pi(uaddr, uaddr2, val, val2, &val3);
|
|
|
+ ret = futex_requeue_pi(uaddr, fshared, uaddr2, val, val2, &val3);
|
|
|
break;
|
|
|
default:
|
|
|
ret = -ENOSYS;
|
|
@@ -2300,23 +2395,24 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
|
|
|
struct timespec ts;
|
|
|
ktime_t t, *tp = NULL;
|
|
|
u32 val2 = 0;
|
|
|
+ int cmd = op & FUTEX_CMD_MASK;
|
|
|
|
|
|
- if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
|
|
|
+ if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) {
|
|
|
if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
|
|
|
return -EFAULT;
|
|
|
if (!timespec_valid(&ts))
|
|
|
return -EINVAL;
|
|
|
|
|
|
t = timespec_to_ktime(ts);
|
|
|
- if (op == FUTEX_WAIT)
|
|
|
+ if (cmd == FUTEX_WAIT)
|
|
|
t = ktime_add(ktime_get(), t);
|
|
|
tp = &t;
|
|
|
}
|
|
|
/*
|
|
|
- * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
|
|
|
+ * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE.
|
|
|
*/
|
|
|
- if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
|
|
|
- || op == FUTEX_CMP_REQUEUE_PI)
|
|
|
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE
|
|
|
+ || cmd == FUTEX_CMP_REQUEUE_PI)
|
|
|
val2 = (u32) (unsigned long) utime;
|
|
|
|
|
|
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
|