[PATCH] epoll: try to be a _bit_ better about file lifetimes

From: Linus Torvalds
Date: Fri May 03 2024 - 17:15:12 EST


epoll is a mess, and does various invalid things in the name of
performance.

Let's try to rein it in a bit. Something like this, perhaps?

Not-yet-signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
---

This is entirely untested, thus the "Not-yet-signed-off-by". But I
think this may be kind of the right path forward.

I suspect the ->poll() call is the main case that matters, but there are
other places where eventpoll just looks up the file pointer without then
being very careful about it. The sock_from_file(epi->ffd.file) uses in
particular should probably also use this to look up the file.

Comments?

fs/eventpoll.c | 30 +++++++++++++++++++++++++++++-
1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 882b89edc52a..bffa8083ff36 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -285,6 +285,30 @@ static inline void free_ephead(struct epitems_head *head)
kmem_cache_free(ephead_cache, head);
}

+/*
+ * The ffd.file pointer may be in the process of
+ * being torn down due to being closed, but we
+ * may not have finished eventpoll_release() yet.
+ *
+ * Technically, even with the atomic_long_inc_not_zero,
+ * the file may have been free'd and then gotten
+ * re-allocated to something else (since files are
+ * not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU).
+ *
+ * But for epoll, we don't much care.
+ */
+static struct file *epi_fget(const struct epitem *epi)
+{
+ struct file *file;
+
+ rcu_read_lock();
+ file = epi->ffd.file;
+ if (!atomic_long_inc_not_zero(&file->f_count))
+ file = NULL;
+ rcu_read_unlock();
+ return file;
+}
+
static void list_file(struct file *file)
{
struct epitems_head *head;
@@ -987,14 +1011,18 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep
static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt,
int depth)
{
- struct file *file = epi->ffd.file;
+ struct file *file = epi_fget(epi);
__poll_t res;

+ if (!file)
+ return 0;
+
pt->_key = epi->event.events;
if (!is_file_epoll(file))
res = vfs_poll(file, pt);
else
res = __ep_eventpoll_poll(file, pt, depth);
+ fput(file);
return res & epi->event.events;
}

--
2.44.0.330.g4d18c88175