|
@@ -33,6 +33,7 @@
|
|
|
#include <linux/bitops.h>
|
|
|
#include <linux/mutex.h>
|
|
|
#include <linux/anon_inodes.h>
|
|
|
+#include <linux/device.h>
|
|
|
#include <asm/uaccess.h>
|
|
|
#include <asm/io.h>
|
|
|
#include <asm/mman.h>
|
|
@@ -87,7 +88,7 @@
|
|
|
*/
|
|
|
|
|
|
/* Epoll private bits inside the event mask */
|
|
|
-#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET)
|
|
|
+#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET)
|
|
|
|
|
|
/* Maximum number of nesting allowed inside epoll sets */
|
|
|
#define EP_MAX_NESTS 4
|
|
@@ -154,6 +155,9 @@ struct epitem {
|
|
|
/* List header used to link this item to the "struct file" items list */
|
|
|
struct list_head fllink;
|
|
|
|
|
|
+ /* wakeup_source used when EPOLLWAKEUP is set */
|
|
|
+ struct wakeup_source *ws;
|
|
|
+
|
|
|
/* The structure that describe the interested events and the source fd */
|
|
|
struct epoll_event event;
|
|
|
};
|
|
@@ -194,6 +198,9 @@ struct eventpoll {
|
|
|
*/
|
|
|
struct epitem *ovflist;
|
|
|
|
|
|
+ /* wakeup_source used when ep_scan_ready_list is running */
|
|
|
+ struct wakeup_source *ws;
|
|
|
+
|
|
|
/* The user that created the eventpoll descriptor */
|
|
|
struct user_struct *user;
|
|
|
|
|
@@ -588,8 +595,10 @@ static int ep_scan_ready_list(struct eventpoll *ep,
|
|
|
* queued into ->ovflist but the "txlist" might already
|
|
|
* contain them, and the list_splice() below takes care of them.
|
|
|
*/
|
|
|
- if (!ep_is_linked(&epi->rdllink))
|
|
|
+ if (!ep_is_linked(&epi->rdllink)) {
|
|
|
list_add_tail(&epi->rdllink, &ep->rdllist);
|
|
|
+ __pm_stay_awake(epi->ws);
|
|
|
+ }
|
|
|
}
|
|
|
/*
|
|
|
* We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
|
|
@@ -602,6 +611,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
|
|
|
* Quickly re-inject items left on "txlist".
|
|
|
*/
|
|
|
list_splice(&txlist, &ep->rdllist);
|
|
|
+ __pm_relax(ep->ws);
|
|
|
|
|
|
if (!list_empty(&ep->rdllist)) {
|
|
|
/*
|
|
@@ -656,6 +666,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
|
|
|
list_del_init(&epi->rdllink);
|
|
|
spin_unlock_irqrestore(&ep->lock, flags);
|
|
|
|
|
|
+ wakeup_source_unregister(epi->ws);
|
|
|
+
|
|
|
/* At this point it is safe to free the eventpoll item */
|
|
|
kmem_cache_free(epi_cache, epi);
|
|
|
|
|
@@ -706,6 +718,7 @@ static void ep_free(struct eventpoll *ep)
|
|
|
mutex_unlock(&epmutex);
|
|
|
mutex_destroy(&ep->mtx);
|
|
|
free_uid(ep->user);
|
|
|
+ wakeup_source_unregister(ep->ws);
|
|
|
kfree(ep);
|
|
|
}
|
|
|
|
|
@@ -737,6 +750,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
|
|
|
* callback, but it's not actually ready, as far as
|
|
|
* caller requested events goes. We can remove it here.
|
|
|
*/
|
|
|
+ __pm_relax(epi->ws);
|
|
|
list_del_init(&epi->rdllink);
|
|
|
}
|
|
|
}
|
|
@@ -927,13 +941,23 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
|
|
|
if (epi->next == EP_UNACTIVE_PTR) {
|
|
|
epi->next = ep->ovflist;
|
|
|
ep->ovflist = epi;
|
|
|
+ if (epi->ws) {
|
|
|
+ /*
|
|
|
+ * Activate ep->ws since epi->ws may get
|
|
|
+ * deactivated at any time.
|
|
|
+ */
|
|
|
+ __pm_stay_awake(ep->ws);
|
|
|
+ }
|
|
|
+
|
|
|
}
|
|
|
goto out_unlock;
|
|
|
}
|
|
|
|
|
|
/* If this file is already in the ready list we exit soon */
|
|
|
- if (!ep_is_linked(&epi->rdllink))
|
|
|
+ if (!ep_is_linked(&epi->rdllink)) {
|
|
|
list_add_tail(&epi->rdllink, &ep->rdllist);
|
|
|
+ __pm_stay_awake(epi->ws);
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
|
|
@@ -1091,6 +1115,30 @@ static int reverse_path_check(void)
|
|
|
return error;
|
|
|
}
|
|
|
|
|
|
+static int ep_create_wakeup_source(struct epitem *epi)
|
|
|
+{
|
|
|
+ const char *name;
|
|
|
+
|
|
|
+ if (!epi->ep->ws) {
|
|
|
+ epi->ep->ws = wakeup_source_register("eventpoll");
|
|
|
+ if (!epi->ep->ws)
|
|
|
+ return -ENOMEM;
|
|
|
+ }
|
|
|
+
|
|
|
+ name = epi->ffd.file->f_path.dentry->d_name.name;
|
|
|
+ epi->ws = wakeup_source_register(name);
|
|
|
+ if (!epi->ws)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static void ep_destroy_wakeup_source(struct epitem *epi)
|
|
|
+{
|
|
|
+ wakeup_source_unregister(epi->ws);
|
|
|
+ epi->ws = NULL;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Must be called with "mtx" held.
|
|
|
*/
|
|
@@ -1118,6 +1166,13 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
|
|
|
epi->event = *event;
|
|
|
epi->nwait = 0;
|
|
|
epi->next = EP_UNACTIVE_PTR;
|
|
|
+ if (epi->event.events & EPOLLWAKEUP) {
|
|
|
+ error = ep_create_wakeup_source(epi);
|
|
|
+ if (error)
|
|
|
+ goto error_create_wakeup_source;
|
|
|
+ } else {
|
|
|
+ epi->ws = NULL;
|
|
|
+ }
|
|
|
|
|
|
/* Initialize the poll table using the queue callback */
|
|
|
epq.epi = epi;
|
|
@@ -1164,6 +1219,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
|
|
|
/* If the file is already "ready" we drop it inside the ready list */
|
|
|
if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
|
|
|
list_add_tail(&epi->rdllink, &ep->rdllist);
|
|
|
+ __pm_stay_awake(epi->ws);
|
|
|
|
|
|
/* Notify waiting tasks that events are available */
|
|
|
if (waitqueue_active(&ep->wq))
|
|
@@ -1204,6 +1260,9 @@ error_unregister:
|
|
|
list_del_init(&epi->rdllink);
|
|
|
spin_unlock_irqrestore(&ep->lock, flags);
|
|
|
|
|
|
+ wakeup_source_unregister(epi->ws);
|
|
|
+
|
|
|
+error_create_wakeup_source:
|
|
|
kmem_cache_free(epi_cache, epi);
|
|
|
|
|
|
return error;
|
|
@@ -1229,6 +1288,12 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
|
|
|
epi->event.events = event->events;
|
|
|
pt._key = event->events;
|
|
|
epi->event.data = event->data; /* protected by mtx */
|
|
|
+ if (epi->event.events & EPOLLWAKEUP) {
|
|
|
+ if (!epi->ws)
|
|
|
+ ep_create_wakeup_source(epi);
|
|
|
+ } else if (epi->ws) {
|
|
|
+ ep_destroy_wakeup_source(epi);
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* Get current event bits. We can safely use the file* here because
|
|
@@ -1244,6 +1309,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
|
|
|
spin_lock_irq(&ep->lock);
|
|
|
if (!ep_is_linked(&epi->rdllink)) {
|
|
|
list_add_tail(&epi->rdllink, &ep->rdllist);
|
|
|
+ __pm_stay_awake(epi->ws);
|
|
|
|
|
|
/* Notify waiting tasks that events are available */
|
|
|
if (waitqueue_active(&ep->wq))
|
|
@@ -1282,6 +1348,18 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
|
|
|
!list_empty(head) && eventcnt < esed->maxevents;) {
|
|
|
epi = list_first_entry(head, struct epitem, rdllink);
|
|
|
|
|
|
+ /*
|
|
|
+ * Activate ep->ws before deactivating epi->ws to prevent
|
|
|
+ * triggering auto-suspend here (in case we reactive epi->ws
|
|
|
+ * below).
|
|
|
+ *
|
|
|
+ * This could be rearranged to delay the deactivation of epi->ws
|
|
|
+ * instead, but then epi->ws would temporarily be out of sync
|
|
|
+ * with ep_is_linked().
|
|
|
+ */
|
|
|
+ if (epi->ws && epi->ws->active)
|
|
|
+ __pm_stay_awake(ep->ws);
|
|
|
+ __pm_relax(epi->ws);
|
|
|
list_del_init(&epi->rdllink);
|
|
|
|
|
|
pt._key = epi->event.events;
|
|
@@ -1298,6 +1376,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
|
|
|
if (__put_user(revents, &uevent->events) ||
|
|
|
__put_user(epi->event.data, &uevent->data)) {
|
|
|
list_add(&epi->rdllink, head);
|
|
|
+ __pm_stay_awake(epi->ws);
|
|
|
return eventcnt ? eventcnt : -EFAULT;
|
|
|
}
|
|
|
eventcnt++;
|
|
@@ -1317,6 +1396,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
|
|
|
* poll callback will queue them in ep->ovflist.
|
|
|
*/
|
|
|
list_add_tail(&epi->rdllink, &ep->rdllist);
|
|
|
+ __pm_stay_awake(epi->ws);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -1629,6 +1709,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
|
|
|
if (!tfile->f_op || !tfile->f_op->poll)
|
|
|
goto error_tgt_fput;
|
|
|
|
|
|
+ /* Check if EPOLLWAKEUP is allowed */
|
|
|
+ if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP))
|
|
|
+ goto error_tgt_fput;
|
|
|
+
|
|
|
/*
|
|
|
* We have to check that the file structure underneath the file descriptor
|
|
|
* the user passed to us _is_ an eventpoll file. And also we do not permit
|