[PATCH 11/17] bpf/task_iter: In task_file_seq_get_next use fnext_task

From: Eric W. Biederman
Date: Mon Aug 17 2020 - 18:12:19 EST


When discussing[1] exec and posix file locks it was realized that none
of the callers of get_files_struct fundamentally needed to call
get_files_struct, and that by switching them to helper functions
instead it will both simplify their code and remove unnecessary
increments of files_struct.count. Those unnecessary increments can
result in exec unnecessarily unsharing files_struct which breaking
posix locks, and it can result in fget_light having to fallback to
fget reducing system performance.

Using fnext_task simplifies task_file_seq_get_next, by moving the
checking for the maximum file descritor into the generic code, and by
remvoing the need for capturing and releasing a reference on
files_struct. As the reference count of files_struct no longer needs
to be maintained bpf_iter_seq_task_file_info can have it's files member
removed and task_file_seq_get_next no longer it's fstruct argument.

The curr_fd local variable does need to become unsigned to be used
with fnext_task. As curr_fd is assigned from and assigned a u32
making curr_fd an unsigned int won't cause problems and might prevent
them.

[1] https://lkml.kernel.org/r/20180915160423.GA31461@xxxxxxxxxx
Suggested-by: Oleg Nesterov <oleg@xxxxxxxxxx>
Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
---
kernel/bpf/task_iter.c | 43 ++++++++++--------------------------------
1 file changed, 10 insertions(+), 33 deletions(-)

diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 232df29793e9..831d42d7543a 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -122,45 +122,33 @@ struct bpf_iter_seq_task_file_info {
*/
struct bpf_iter_seq_task_common common;
struct task_struct *task;
- struct files_struct *files;
u32 tid;
u32 fd;
};

static struct file *
task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
- struct task_struct **task, struct files_struct **fstruct)
+ struct task_struct **task)
{
struct pid_namespace *ns = info->common.ns;
- u32 curr_tid = info->tid, max_fds;
- struct files_struct *curr_files;
+ u32 curr_tid = info->tid;
struct task_struct *curr_task;
- int curr_fd = info->fd;
+ unsigned int curr_fd = info->fd;

/* If this function returns a non-NULL file object,
- * it held a reference to the task/files_struct/file.
+ * it held a reference to the task/file.
* Otherwise, it does not hold any reference.
*/
again:
if (*task) {
curr_task = *task;
- curr_files = *fstruct;
curr_fd = info->fd;
} else {
curr_task = task_seq_get_next(ns, &curr_tid);
if (!curr_task)
return NULL;

- curr_files = get_files_struct(curr_task);
- if (!curr_files) {
- put_task_struct(curr_task);
- curr_tid = ++(info->tid);
- info->fd = 0;
- goto again;
- }
-
- /* set *fstruct, *task and info->tid */
- *fstruct = curr_files;
+ /* set *task and info->tid */
*task = curr_task;
if (curr_tid == info->tid) {
curr_fd = info->fd;
@@ -171,13 +159,12 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
}

rcu_read_lock();
- max_fds = files_fdtable(curr_files)->max_fds;
- for (; curr_fd < max_fds; curr_fd++) {
+ for (;; curr_fd++) {
struct file *f;

- f = fcheck_files(curr_files, curr_fd);
+ f = fnext_task(curr_task, &curr_fd);
if (!f)
- continue;
+ break;

/* set info->fd */
info->fd = curr_fd;
@@ -188,10 +175,8 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,

/* the current task is done, go to the next task */
rcu_read_unlock();
- put_files_struct(curr_files);
put_task_struct(curr_task);
*task = NULL;
- *fstruct = NULL;
info->fd = 0;
curr_tid = ++(info->tid);
goto again;
@@ -200,13 +185,11 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
{
struct bpf_iter_seq_task_file_info *info = seq->private;
- struct files_struct *files = NULL;
struct task_struct *task = NULL;
struct file *file;

- file = task_file_seq_get_next(info, &task, &files);
+ file = task_file_seq_get_next(info, &task);
if (!file) {
- info->files = NULL;
info->task = NULL;
return NULL;
}
@@ -214,7 +197,6 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
if (*pos == 0)
++*pos;
info->task = task;
- info->files = files;

return file;
}
@@ -222,22 +204,19 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct bpf_iter_seq_task_file_info *info = seq->private;
- struct files_struct *files = info->files;
struct task_struct *task = info->task;
struct file *file;

++*pos;
++info->fd;
fput((struct file *)v);
- file = task_file_seq_get_next(info, &task, &files);
+ file = task_file_seq_get_next(info, &task);
if (!file) {
- info->files = NULL;
info->task = NULL;
return NULL;
}

info->task = task;
- info->files = files;

return file;
}
@@ -286,9 +265,7 @@ static void task_file_seq_stop(struct seq_file *seq, void *v)
(void)__task_file_seq_show(seq, v, true);
} else {
fput((struct file *)v);
- put_files_struct(info->files);
put_task_struct(info->task);
- info->files = NULL;
info->task = NULL;
}
}
--
2.25.0