|
@@ -70,6 +70,15 @@
|
|
|
* simultaneous inserts (A into B and B into A) from racing and
|
|
|
* constructing a cycle without either insert observing that it is
|
|
|
* going to.
|
|
|
+ * It is necessary to acquire multiple "ep->mtx"es at once in the
|
|
|
+ * case when one epoll fd is added to another. In this case, we
|
|
|
+ * always acquire the locks in the order of nesting (i.e. after
|
|
|
+ * epoll_ctl(e1, EPOLL_CTL_ADD, e2), e1->mtx will always be acquired
|
|
|
+ * before e2->mtx). Since we disallow cycles of epoll file
|
|
|
+ * descriptors, this ensures that the mutexes are well-ordered. In
|
|
|
+ * order to communicate this nesting to lockdep, when walking a tree
|
|
|
+ * of epoll file descriptors, we use the current recursion depth as
|
|
|
+ * the lockdep subkey.
|
|
|
* It is possible to drop the "ep->mtx" and to use the global
|
|
|
* mutex "epmutex" (together with "ep->lock") to have it working,
|
|
|
* but having "ep->mtx" will make the interface more scalable.
|
|
@@ -464,13 +473,15 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
|
|
|
* @ep: Pointer to the epoll private data structure.
|
|
|
* @sproc: Pointer to the scan callback.
|
|
|
* @priv: Private opaque data passed to the @sproc callback.
|
|
|
+ * @depth: The current depth of recursive f_op->poll calls.
|
|
|
*
|
|
|
* Returns: The same integer error code returned by the @sproc callback.
|
|
|
*/
|
|
|
static int ep_scan_ready_list(struct eventpoll *ep,
|
|
|
int (*sproc)(struct eventpoll *,
|
|
|
struct list_head *, void *),
|
|
|
- void *priv)
|
|
|
+ void *priv,
|
|
|
+ int depth)
|
|
|
{
|
|
|
int error, pwake = 0;
|
|
|
unsigned long flags;
|
|
@@ -481,7 +492,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
|
|
|
* We need to lock this because we could be hit by
|
|
|
* eventpoll_release_file() and epoll_ctl().
|
|
|
*/
|
|
|
- mutex_lock(&ep->mtx);
|
|
|
+ mutex_lock_nested(&ep->mtx, depth);
|
|
|
|
|
|
/*
|
|
|
* Steal the ready list, and re-init the original one to the
|
|
@@ -670,7 +681,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
|
|
|
|
|
|
static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
|
|
|
{
|
|
|
- return ep_scan_ready_list(priv, ep_read_events_proc, NULL);
|
|
|
+ return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1);
|
|
|
}
|
|
|
|
|
|
static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
|
|
@@ -737,7 +748,7 @@ void eventpoll_release_file(struct file *file)
|
|
|
|
|
|
ep = epi->ep;
|
|
|
list_del_init(&epi->fllink);
|
|
|
- mutex_lock(&ep->mtx);
|
|
|
+ mutex_lock_nested(&ep->mtx, 0);
|
|
|
ep_remove(ep, epi);
|
|
|
mutex_unlock(&ep->mtx);
|
|
|
}
|
|
@@ -1134,7 +1145,7 @@ static int ep_send_events(struct eventpoll *ep,
|
|
|
esed.maxevents = maxevents;
|
|
|
esed.events = events;
|
|
|
|
|
|
- return ep_scan_ready_list(ep, ep_send_events_proc, &esed);
|
|
|
+ return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0);
|
|
|
}
|
|
|
|
|
|
static inline struct timespec ep_set_mstimeout(long ms)
|
|
@@ -1267,7 +1278,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
|
|
|
struct rb_node *rbp;
|
|
|
struct epitem *epi;
|
|
|
|
|
|
- mutex_lock(&ep->mtx);
|
|
|
+ mutex_lock_nested(&ep->mtx, call_nests + 1);
|
|
|
for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
|
|
|
epi = rb_entry(rbp, struct epitem, rbn);
|
|
|
if (unlikely(is_file_epoll(epi->ffd.file))) {
|
|
@@ -1409,7 +1420,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
|
|
|
}
|
|
|
|
|
|
|
|
|
- mutex_lock(&ep->mtx);
|
|
|
+ mutex_lock_nested(&ep->mtx, 0);
|
|
|
|
|
|
/*
|
|
|
* Try to lookup the file inside our RB tree, Since we grabbed "mtx"
|