|
@@ -41,6 +41,7 @@
|
|
|
#include <linux/proc_fs.h>
|
|
|
#include <linux/seq_file.h>
|
|
|
#include <linux/compat.h>
|
|
|
+#include <linux/rculist.h>
|
|
|
|
|
|
/*
|
|
|
* LOCKING:
|
|
@@ -133,8 +134,12 @@ struct nested_calls {
|
|
|
* of these on a server and we do not want this to take another cache line.
|
|
|
*/
|
|
|
struct epitem {
|
|
|
- /* RB tree node used to link this structure to the eventpoll RB tree */
|
|
|
- struct rb_node rbn;
|
|
|
+ union {
|
|
|
+ /* RB tree node links this structure to the eventpoll RB tree */
|
|
|
+ struct rb_node rbn;
|
|
|
+ /* Used to free the struct epitem */
|
|
|
+ struct rcu_head rcu;
|
|
|
+ };
|
|
|
|
|
|
/* List header used to link this structure to the eventpoll ready list */
|
|
|
struct list_head rdllink;
|
|
@@ -671,6 +676,12 @@ static int ep_scan_ready_list(struct eventpoll *ep,
|
|
|
return error;
|
|
|
}
|
|
|
|
|
|
+static void epi_rcu_free(struct rcu_head *head)
|
|
|
+{
|
|
|
+ struct epitem *epi = container_of(head, struct epitem, rcu);
|
|
|
+ kmem_cache_free(epi_cache, epi);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Removes a "struct epitem" from the eventpoll RB tree and deallocates
|
|
|
* all the associated resources. Must be called with "mtx" held.
|
|
@@ -692,8 +703,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
|
|
|
|
|
|
/* Remove the current item from the list of epoll hooks */
|
|
|
spin_lock(&file->f_lock);
|
|
|
- if (ep_is_linked(&epi->fllink))
|
|
|
- list_del_init(&epi->fllink);
|
|
|
+ list_del_rcu(&epi->fllink);
|
|
|
spin_unlock(&file->f_lock);
|
|
|
|
|
|
rb_erase(&epi->rbn, &ep->rbr);
|
|
@@ -704,9 +714,14 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
|
|
|
spin_unlock_irqrestore(&ep->lock, flags);
|
|
|
|
|
|
wakeup_source_unregister(ep_wakeup_source(epi));
|
|
|
-
|
|
|
- /* At this point it is safe to free the eventpoll item */
|
|
|
- kmem_cache_free(epi_cache, epi);
|
|
|
+ /*
|
|
|
+ * At this point it is safe to free the eventpoll item. Use the union
|
|
|
+ * field epi->rcu, since we are trying to minimize the size of
|
|
|
+ * 'struct epitem'. The 'rbn' field is no longer in use. Protected by
|
|
|
+ * ep->mtx. The rcu read side, reverse_path_check_proc(), does not make
|
|
|
+ * use of the rbn field.
|
|
|
+ */
|
|
|
+ call_rcu(&epi->rcu, epi_rcu_free);
|
|
|
|
|
|
atomic_long_dec(&ep->user->epoll_watches);
|
|
|
|
|
@@ -872,7 +887,6 @@ static const struct file_operations eventpoll_fops = {
|
|
|
*/
|
|
|
void eventpoll_release_file(struct file *file)
|
|
|
{
|
|
|
- struct list_head *lsthead = &file->f_ep_links;
|
|
|
struct eventpoll *ep;
|
|
|
struct epitem *epi;
|
|
|
|
|
@@ -890,17 +904,12 @@ void eventpoll_release_file(struct file *file)
|
|
|
* Besides, ep_remove() acquires the lock, so we can't hold it here.
|
|
|
*/
|
|
|
mutex_lock(&epmutex);
|
|
|
-
|
|
|
- while (!list_empty(lsthead)) {
|
|
|
- epi = list_first_entry(lsthead, struct epitem, fllink);
|
|
|
-
|
|
|
+ list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) {
|
|
|
ep = epi->ep;
|
|
|
- list_del_init(&epi->fllink);
|
|
|
mutex_lock_nested(&ep->mtx, 0);
|
|
|
ep_remove(ep, epi);
|
|
|
mutex_unlock(&ep->mtx);
|
|
|
}
|
|
|
-
|
|
|
mutex_unlock(&epmutex);
|
|
|
}
|
|
|
|
|
@@ -1138,7 +1147,9 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
|
|
|
struct file *child_file;
|
|
|
struct epitem *epi;
|
|
|
|
|
|
- list_for_each_entry(epi, &file->f_ep_links, fllink) {
|
|
|
+ /* CTL_DEL can remove links here, but that can't increase our count */
|
|
|
+ rcu_read_lock();
|
|
|
+ list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) {
|
|
|
child_file = epi->ep->file;
|
|
|
if (is_file_epoll(child_file)) {
|
|
|
if (list_empty(&child_file->f_ep_links)) {
|
|
@@ -1160,6 +1171,7 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
|
|
|
"file is not an ep!\n");
|
|
|
}
|
|
|
}
|
|
|
+ rcu_read_unlock();
|
|
|
return error;
|
|
|
}
|
|
|
|
|
@@ -1286,7 +1298,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
|
|
|
|
|
|
/* Add the current item to the list of active epoll hook for this file */
|
|
|
spin_lock(&tfile->f_lock);
|
|
|
- list_add_tail(&epi->fllink, &tfile->f_ep_links);
|
|
|
+ list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
|
|
|
spin_unlock(&tfile->f_lock);
|
|
|
|
|
|
/*
|
|
@@ -1327,8 +1339,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
|
|
|
|
|
|
error_remove_epi:
|
|
|
spin_lock(&tfile->f_lock);
|
|
|
- if (ep_is_linked(&epi->fllink))
|
|
|
- list_del_init(&epi->fllink);
|
|
|
+ list_del_rcu(&epi->fllink);
|
|
|
spin_unlock(&tfile->f_lock);
|
|
|
|
|
|
rb_erase(&epi->rbn, &ep->rbr);
|
|
@@ -1844,15 +1855,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
|
|
|
* and hang them on the tfile_check_list, so we can check that we
|
|
|
* haven't created too many possible wakeup paths.
|
|
|
*
|
|
|
- * We need to hold the epmutex across both ep_insert and ep_remove
|
|
|
- * b/c we want to make sure we are looking at a coherent view of
|
|
|
- * epoll network.
|
|
|
+ * We need to hold the epmutex across ep_insert to prevent
|
|
|
+ * multple adds from creating loops in parallel.
|
|
|
*/
|
|
|
- if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
|
|
|
+ if (op == EPOLL_CTL_ADD) {
|
|
|
mutex_lock(&epmutex);
|
|
|
did_lock_epmutex = 1;
|
|
|
- }
|
|
|
- if (op == EPOLL_CTL_ADD) {
|
|
|
if (is_file_epoll(tf.file)) {
|
|
|
error = -ELOOP;
|
|
|
if (ep_loop_check(ep, tf.file) != 0) {
|