|
@@ -585,14 +585,14 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
|
|
|
* @sproc: Pointer to the scan callback.
|
|
|
* @priv: Private opaque data passed to the @sproc callback.
|
|
|
* @depth: The current depth of recursive f_op->poll calls.
|
|
|
+ * @ep_locked: caller already holds ep->mtx
|
|
|
*
|
|
|
* Returns: The same integer error code returned by the @sproc callback.
|
|
|
*/
|
|
|
static int ep_scan_ready_list(struct eventpoll *ep,
|
|
|
int (*sproc)(struct eventpoll *,
|
|
|
struct list_head *, void *),
|
|
|
- void *priv,
|
|
|
- int depth)
|
|
|
+ void *priv, int depth, bool ep_locked)
|
|
|
{
|
|
|
int error, pwake = 0;
|
|
|
unsigned long flags;
|
|
@@ -603,7 +603,9 @@ static int ep_scan_ready_list(struct eventpoll *ep,
|
|
|
* We need to lock this because we could be hit by
|
|
|
* eventpoll_release_file() and epoll_ctl().
|
|
|
*/
|
|
|
- mutex_lock_nested(&ep->mtx, depth);
|
|
|
+
|
|
|
+ if (!ep_locked)
|
|
|
+ mutex_lock_nested(&ep->mtx, depth);
|
|
|
|
|
|
/*
|
|
|
* Steal the ready list, and re-init the original one to the
|
|
@@ -667,7 +669,8 @@ static int ep_scan_ready_list(struct eventpoll *ep,
|
|
|
}
|
|
|
spin_unlock_irqrestore(&ep->lock, flags);
|
|
|
|
|
|
- mutex_unlock(&ep->mtx);
|
|
|
+ if (!ep_locked)
|
|
|
+ mutex_unlock(&ep->mtx);
|
|
|
|
|
|
/* We have to call this outside the lock */
|
|
|
if (pwake)
|
|
@@ -822,15 +825,34 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
|
|
|
+ poll_table *pt);
|
|
|
+
|
|
|
+struct readyevents_arg {
|
|
|
+ struct eventpoll *ep;
|
|
|
+ bool locked;
|
|
|
+};
|
|
|
+
|
|
|
static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
|
|
|
{
|
|
|
- return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1);
|
|
|
+ struct readyevents_arg *arg = priv;
|
|
|
+
|
|
|
+ return ep_scan_ready_list(arg->ep, ep_read_events_proc, NULL,
|
|
|
+ call_nests + 1, arg->locked);
|
|
|
}
|
|
|
|
|
|
static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
|
|
|
{
|
|
|
int pollflags;
|
|
|
struct eventpoll *ep = file->private_data;
|
|
|
+ struct readyevents_arg arg;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * During ep_insert() we already hold the ep->mtx for the tfile.
|
|
|
+ * Prevent re-aquisition.
|
|
|
+ */
|
|
|
+ arg.locked = wait && (wait->_qproc == ep_ptable_queue_proc);
|
|
|
+ arg.ep = ep;
|
|
|
|
|
|
/* Insert inside our poll wait queue */
|
|
|
poll_wait(file, &ep->poll_wait, wait);
|
|
@@ -842,7 +864,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
|
|
|
* could re-enter here.
|
|
|
*/
|
|
|
pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
|
|
|
- ep_poll_readyevents_proc, ep, ep, current);
|
|
|
+ ep_poll_readyevents_proc, &arg, ep, current);
|
|
|
|
|
|
return pollflags != -1 ? pollflags : 0;
|
|
|
}
|
|
@@ -1243,7 +1265,7 @@ static noinline void ep_destroy_wakeup_source(struct epitem *epi)
|
|
|
* Must be called with "mtx" held.
|
|
|
*/
|
|
|
static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
|
|
|
- struct file *tfile, int fd)
|
|
|
+ struct file *tfile, int fd, int full_check)
|
|
|
{
|
|
|
int error, revents, pwake = 0;
|
|
|
unsigned long flags;
|
|
@@ -1309,7 +1331,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
|
|
|
|
|
|
/* now check if we've created too many backpaths */
|
|
|
error = -EINVAL;
|
|
|
- if (reverse_path_check())
|
|
|
+ if (full_check && reverse_path_check())
|
|
|
goto error_remove_epi;
|
|
|
|
|
|
/* We have to drop the new item inside our item list to keep track of it */
|
|
@@ -1532,7 +1554,7 @@ static int ep_send_events(struct eventpoll *ep,
|
|
|
esed.maxevents = maxevents;
|
|
|
esed.events = events;
|
|
|
|
|
|
- return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0);
|
|
|
+ return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0, false);
|
|
|
}
|
|
|
|
|
|
static inline struct timespec ep_set_mstimeout(long ms)
|
|
@@ -1802,11 +1824,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
|
|
|
struct epoll_event __user *, event)
|
|
|
{
|
|
|
int error;
|
|
|
- int did_lock_epmutex = 0;
|
|
|
+ int full_check = 0;
|
|
|
struct fd f, tf;
|
|
|
struct eventpoll *ep;
|
|
|
struct epitem *epi;
|
|
|
struct epoll_event epds;
|
|
|
+ struct eventpoll *tep = NULL;
|
|
|
|
|
|
error = -EFAULT;
|
|
|
if (ep_op_has_event(op) &&
|
|
@@ -1855,23 +1878,40 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
|
|
|
* and hang them on the tfile_check_list, so we can check that we
|
|
|
* haven't created too many possible wakeup paths.
|
|
|
*
|
|
|
- * We need to hold the epmutex across ep_insert to prevent
|
|
|
- * multple adds from creating loops in parallel.
|
|
|
+ * We do not need to take the global 'epumutex' on EPOLL_CTL_ADD when
|
|
|
+ * the epoll file descriptor is attaching directly to a wakeup source,
|
|
|
+ * unless the epoll file descriptor is nested. The purpose of taking the
|
|
|
+ * 'epmutex' on add is to prevent complex toplogies such as loops and
|
|
|
+ * deep wakeup paths from forming in parallel through multiple
|
|
|
+ * EPOLL_CTL_ADD operations.
|
|
|
*/
|
|
|
+ mutex_lock_nested(&ep->mtx, 0);
|
|
|
if (op == EPOLL_CTL_ADD) {
|
|
|
- mutex_lock(&epmutex);
|
|
|
- did_lock_epmutex = 1;
|
|
|
- if (is_file_epoll(tf.file)) {
|
|
|
- error = -ELOOP;
|
|
|
- if (ep_loop_check(ep, tf.file) != 0) {
|
|
|
- clear_tfile_check_list();
|
|
|
- goto error_tgt_fput;
|
|
|
+ if (!list_empty(&f.file->f_ep_links) ||
|
|
|
+ is_file_epoll(tf.file)) {
|
|
|
+ full_check = 1;
|
|
|
+ mutex_unlock(&ep->mtx);
|
|
|
+ mutex_lock(&epmutex);
|
|
|
+ if (is_file_epoll(tf.file)) {
|
|
|
+ error = -ELOOP;
|
|
|
+ if (ep_loop_check(ep, tf.file) != 0) {
|
|
|
+ clear_tfile_check_list();
|
|
|
+ goto error_tgt_fput;
|
|
|
+ }
|
|
|
+ } else
|
|
|
+ list_add(&tf.file->f_tfile_llink,
|
|
|
+ &tfile_check_list);
|
|
|
+ mutex_lock_nested(&ep->mtx, 0);
|
|
|
+ if (is_file_epoll(tf.file)) {
|
|
|
+ tep = tf.file->private_data;
|
|
|
+ mutex_lock_nested(&tep->mtx, 1);
|
|
|
}
|
|
|
- } else
|
|
|
- list_add(&tf.file->f_tfile_llink, &tfile_check_list);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (op == EPOLL_CTL_DEL && is_file_epoll(tf.file)) {
|
|
|
+ tep = tf.file->private_data;
|
|
|
+ mutex_lock_nested(&tep->mtx, 1);
|
|
|
}
|
|
|
-
|
|
|
- mutex_lock_nested(&ep->mtx, 0);
|
|
|
|
|
|
/*
|
|
|
* Try to lookup the file inside our RB tree, Since we grabbed "mtx"
|
|
@@ -1885,10 +1925,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
|
|
|
case EPOLL_CTL_ADD:
|
|
|
if (!epi) {
|
|
|
epds.events |= POLLERR | POLLHUP;
|
|
|
- error = ep_insert(ep, &epds, tf.file, fd);
|
|
|
+ error = ep_insert(ep, &epds, tf.file, fd, full_check);
|
|
|
} else
|
|
|
error = -EEXIST;
|
|
|
- clear_tfile_check_list();
|
|
|
+ if (full_check)
|
|
|
+ clear_tfile_check_list();
|
|
|
break;
|
|
|
case EPOLL_CTL_DEL:
|
|
|
if (epi)
|
|
@@ -1904,10 +1945,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
|
|
|
error = -ENOENT;
|
|
|
break;
|
|
|
}
|
|
|
+ if (tep != NULL)
|
|
|
+ mutex_unlock(&tep->mtx);
|
|
|
mutex_unlock(&ep->mtx);
|
|
|
|
|
|
error_tgt_fput:
|
|
|
- if (did_lock_epmutex)
|
|
|
+ if (full_check)
|
|
|
mutex_unlock(&epmutex);
|
|
|
|
|
|
fdput(tf);
|