|
@@ -252,71 +252,113 @@ static void sem_rcu_free(struct rcu_head *head)
|
|
|
ipc_rcu_free(head);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Wait until all currently ongoing simple ops have completed.
|
|
|
+ * Caller must own sem_perm.lock.
|
|
|
+ * New simple ops cannot start, because simple ops first check
|
|
|
+ * that sem_perm.lock is free.
|
|
|
+ * that a) sem_perm.lock is free and b) complex_count is 0.
|
|
|
+ */
|
|
|
+static void sem_wait_array(struct sem_array *sma)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+ struct sem *sem;
|
|
|
+
|
|
|
+ if (sma->complex_count) {
|
|
|
+ /* The thread that increased sma->complex_count waited on
|
|
|
+ * all sem->lock locks. Thus we don't need to wait again.
|
|
|
+ */
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (i = 0; i < sma->sem_nsems; i++) {
|
|
|
+ sem = sma->sem_base + i;
|
|
|
+ spin_unlock_wait(&sem->lock);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* If the request contains only one semaphore operation, and there are
|
|
|
* no complex transactions pending, lock only the semaphore involved.
|
|
|
* Otherwise, lock the entire semaphore array, since we either have
|
|
|
* multiple semaphores in our own semops, or we need to look at
|
|
|
* semaphores from other pending complex operations.
|
|
|
- *
|
|
|
- * Carefully guard against sma->complex_count changing between zero
|
|
|
- * and non-zero while we are spinning for the lock. The value of
|
|
|
- * sma->complex_count cannot change while we are holding the lock,
|
|
|
- * so sem_unlock should be fine.
|
|
|
- *
|
|
|
- * The global lock path checks that all the local locks have been released,
|
|
|
- * checking each local lock once. This means that the local lock paths
|
|
|
- * cannot start their critical sections while the global lock is held.
|
|
|
*/
|
|
|
static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
|
|
|
int nsops)
|
|
|
{
|
|
|
- int locknum;
|
|
|
- again:
|
|
|
- if (nsops == 1 && !sma->complex_count) {
|
|
|
- struct sem *sem = sma->sem_base + sops->sem_num;
|
|
|
+ struct sem *sem;
|
|
|
|
|
|
- /* Lock just the semaphore we are interested in. */
|
|
|
- spin_lock(&sem->lock);
|
|
|
+ if (nsops != 1) {
|
|
|
+ /* Complex operation - acquire a full lock */
|
|
|
+ ipc_lock_object(&sma->sem_perm);
|
|
|
|
|
|
- /*
|
|
|
- * If sma->complex_count was set while we were spinning,
|
|
|
- * we may need to look at things we did not lock here.
|
|
|
+ /* And wait until all simple ops that are processed
|
|
|
+ * right now have dropped their locks.
|
|
|
*/
|
|
|
- if (unlikely(sma->complex_count)) {
|
|
|
- spin_unlock(&sem->lock);
|
|
|
- goto lock_array;
|
|
|
- }
|
|
|
+ sem_wait_array(sma);
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
|
|
|
+ /*
|
|
|
+ * Only one semaphore affected - try to optimize locking.
|
|
|
+ * The rules are:
|
|
|
+ * - optimized locking is possible if no complex operation
|
|
|
+ * is either enqueued or processed right now.
|
|
|
+ * - The test for enqueued complex ops is simple:
|
|
|
+ * sma->complex_count != 0
|
|
|
+ * - Testing for complex ops that are processed right now is
|
|
|
+ * a bit more difficult. Complex ops acquire the full lock
|
|
|
+ * and first wait that the running simple ops have completed.
|
|
|
+ * (see above)
|
|
|
+ * Thus: If we own a simple lock and the global lock is free
|
|
|
+ * and complex_count is now 0, then it will stay 0 and
|
|
|
+ * thus just locking sem->lock is sufficient.
|
|
|
+ */
|
|
|
+ sem = sma->sem_base + sops->sem_num;
|
|
|
+
|
|
|
+ if (sma->complex_count == 0) {
|
|
|
/*
|
|
|
- * Another process is holding the global lock on the
|
|
|
- * sem_array; we cannot enter our critical section,
|
|
|
- * but have to wait for the global lock to be released.
|
|
|
+ * It appears that no complex operation is around.
|
|
|
+ * Acquire the per-semaphore lock.
|
|
|
*/
|
|
|
- if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
|
|
|
- spin_unlock(&sem->lock);
|
|
|
- spin_unlock_wait(&sma->sem_perm.lock);
|
|
|
- goto again;
|
|
|
+ spin_lock(&sem->lock);
|
|
|
+
|
|
|
+ /* Then check that the global lock is free */
|
|
|
+ if (!spin_is_locked(&sma->sem_perm.lock)) {
|
|
|
+ /* spin_is_locked() is not a memory barrier */
|
|
|
+ smp_mb();
|
|
|
+
|
|
|
+ /* Now repeat the test of complex_count:
|
|
|
+ * It can't change anymore until we drop sem->lock.
|
|
|
+ * Thus: if is now 0, then it will stay 0.
|
|
|
+ */
|
|
|
+ if (sma->complex_count == 0) {
|
|
|
+ /* fast path successful! */
|
|
|
+ return sops->sem_num;
|
|
|
+ }
|
|
|
}
|
|
|
+ spin_unlock(&sem->lock);
|
|
|
+ }
|
|
|
|
|
|
- locknum = sops->sem_num;
|
|
|
+ /* slow path: acquire the full lock */
|
|
|
+ ipc_lock_object(&sma->sem_perm);
|
|
|
+
|
|
|
+ if (sma->complex_count == 0) {
|
|
|
+ /* False alarm:
|
|
|
+ * There is no complex operation, thus we can switch
|
|
|
+ * back to the fast path.
|
|
|
+ */
|
|
|
+ spin_lock(&sem->lock);
|
|
|
+ ipc_unlock_object(&sma->sem_perm);
|
|
|
+ return sops->sem_num;
|
|
|
} else {
|
|
|
- int i;
|
|
|
- /*
|
|
|
- * Lock the semaphore array, and wait for all of the
|
|
|
- * individual semaphore locks to go away. The code
|
|
|
- * above ensures no new single-lock holders will enter
|
|
|
- * their critical section while the array lock is held.
|
|
|
+ /* Not a false alarm, thus complete the sequence for a
|
|
|
+ * full lock.
|
|
|
*/
|
|
|
- lock_array:
|
|
|
- ipc_lock_object(&sma->sem_perm);
|
|
|
- for (i = 0; i < sma->sem_nsems; i++) {
|
|
|
- struct sem *sem = sma->sem_base + i;
|
|
|
- spin_unlock_wait(&sem->lock);
|
|
|
- }
|
|
|
- locknum = -1;
|
|
|
+ sem_wait_array(sma);
|
|
|
+ return -1;
|
|
|
}
|
|
|
- return locknum;
|
|
|
}
|
|
|
|
|
|
static inline void sem_unlock(struct sem_array *sma, int locknum)
|
|
@@ -875,6 +917,24 @@ again:
|
|
|
return semop_completed;
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * set_semotime(sma, sops) - set sem_otime
|
|
|
+ * @sma: semaphore array
|
|
|
+ * @sops: operations that modified the array, may be NULL
|
|
|
+ *
|
|
|
+ * sem_otime is replicated to avoid cache line trashing.
|
|
|
+ * This function sets one instance to the current time.
|
|
|
+ */
|
|
|
+static void set_semotime(struct sem_array *sma, struct sembuf *sops)
|
|
|
+{
|
|
|
+ if (sops == NULL) {
|
|
|
+ sma->sem_base[0].sem_otime = get_seconds();
|
|
|
+ } else {
|
|
|
+ sma->sem_base[sops[0].sem_num].sem_otime =
|
|
|
+ get_seconds();
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
|
|
|
* @sma: semaphore array
|
|
@@ -925,17 +985,10 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- if (otime) {
|
|
|
- if (sops == NULL) {
|
|
|
- sma->sem_base[0].sem_otime = get_seconds();
|
|
|
- } else {
|
|
|
- sma->sem_base[sops[0].sem_num].sem_otime =
|
|
|
- get_seconds();
|
|
|
- }
|
|
|
- }
|
|
|
+ if (otime)
|
|
|
+ set_semotime(sma, sops);
|
|
|
}
|
|
|
|
|
|
-
|
|
|
/* The following counts are associated to each semaphore:
|
|
|
* semncnt number of tasks waiting on semval being nonzero
|
|
|
* semzcnt number of tasks waiting on semval being zero
|
|
@@ -1797,12 +1850,17 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
|
|
|
|
|
|
error = perform_atomic_semop(sma, sops, nsops, un,
|
|
|
task_tgid_vnr(current));
|
|
|
- if (error <= 0) {
|
|
|
- if (alter && error == 0)
|
|
|
+ if (error == 0) {
|
|
|
+ /* If the operation was successful, then do
|
|
|
+ * the required updates.
|
|
|
+ */
|
|
|
+ if (alter)
|
|
|
do_smart_update(sma, sops, nsops, 1, &tasks);
|
|
|
-
|
|
|
- goto out_unlock_free;
|
|
|
+ else
|
|
|
+ set_semotime(sma, sops);
|
|
|
}
|
|
|
+ if (error <= 0)
|
|
|
+ goto out_unlock_free;
|
|
|
|
|
|
/* We need to sleep on this operation, so we put the current
|
|
|
* task into the pending queue and go to sleep.
|
|
@@ -2061,6 +2119,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
|
|
|
struct sem_array *sma = it;
|
|
|
time_t sem_otime;
|
|
|
|
|
|
+ /*
|
|
|
+ * The proc interface isn't aware of sem_lock(), it calls
|
|
|
+ * ipc_lock_object() directly (in sysvipc_find_ipc).
|
|
|
+ * In order to stay compatible with sem_lock(), we must wait until
|
|
|
+ * all simple semop() calls have left their critical regions.
|
|
|
+ */
|
|
|
+ sem_wait_array(sma);
|
|
|
+
|
|
|
sem_otime = get_semotime(sma);
|
|
|
|
|
|
return seq_printf(s,
|