Re: [PATCH] eventfs: Stop using dcache_readdir() for getdents()

From: Linus Torvalds
Date: Wed Jan 03 2024 - 16:55:06 EST


On Wed, 3 Jan 2024 at 11:57, Linus Torvalds
<torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:
>
> Or, you know, you could do what I've told you to do at least TEN TIMES
> already, which is to not mess with any of this, and just implement the
> '->permission()' callback (and getattr() to just make 'ls' look sane
> too, rather than silently saying "we'll act as if gid is set right,
> but not show it").

Actually, an even simpler option might be to just do this all at
d_revalidate() time.

Here's an updated patch that builds, and is PURELY AN EXAMPLE. I think
it "works", but it currently always resets the inode mode/uid/gid
unconditionally, which is wrong - it should not do so if the inode has
been manually set.

So take this as a "this might work", but it probably needs a bit more
work - eventfs_set_attr() should set some bit in the inode to say
"these have been set manually", and then revalidate would say "I'll
not touch inodes that have that bit set".

Or something.

Anyway, this patch is nwo relative to your latest pull request, so it
has the check for dentry->d_inode in set_gid() (and still removes the
whole function).

Again: UNTESTED, and meant as a "this is another way to avoid messing
with the dentry tree manually, and just using the VFS interfaces we
already have"

Linus
fs/tracefs/inode.c | 147 ++++++++++-------------------------------------------
1 file changed, 26 insertions(+), 121 deletions(-)

diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index bc86ffdb103b..5bc9e1a23a31 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -183,87 +183,6 @@ struct tracefs_fs_info {
struct tracefs_mount_opts mount_opts;
};

-static void change_gid(struct dentry *dentry, kgid_t gid)
-{
- if (!dentry->d_inode)
- return;
- dentry->d_inode->i_gid = gid;
-}
-
-/*
- * Taken from d_walk, but without he need for handling renames.
- * Nothing can be renamed while walking the list, as tracefs
- * does not support renames. This is only called when mounting
- * or remounting the file system, to set all the files to
- * the given gid.
- */
-static void set_gid(struct dentry *parent, kgid_t gid)
-{
- struct dentry *this_parent;
- struct list_head *next;
-
- this_parent = parent;
- spin_lock(&this_parent->d_lock);
-
- change_gid(this_parent, gid);
-repeat:
- next = this_parent->d_subdirs.next;
-resume:
- while (next != &this_parent->d_subdirs) {
- struct tracefs_inode *ti;
- struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
- next = tmp->next;
-
- /* Note, getdents() can add a cursor dentry with no inode */
- if (!dentry->d_inode)
- continue;
-
- spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-
- change_gid(dentry, gid);
-
- /* If this is the events directory, update that too */
- ti = get_tracefs(dentry->d_inode);
- if (ti && (ti->flags & TRACEFS_EVENT_INODE))
- eventfs_update_gid(dentry, gid);
-
- if (!list_empty(&dentry->d_subdirs)) {
- spin_unlock(&this_parent->d_lock);
- spin_release(&dentry->d_lock.dep_map, _RET_IP_);
- this_parent = dentry;
- spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
- goto repeat;
- }
- spin_unlock(&dentry->d_lock);
- }
- /*
- * All done at this level ... ascend and resume the search.
- */
- rcu_read_lock();
-ascend:
- if (this_parent != parent) {
- struct dentry *child = this_parent;
- this_parent = child->d_parent;
-
- spin_unlock(&child->d_lock);
- spin_lock(&this_parent->d_lock);
-
- /* go into the first sibling still alive */
- do {
- next = child->d_child.next;
- if (next == &this_parent->d_subdirs)
- goto ascend;
- child = list_entry(next, struct dentry, d_child);
- } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
- rcu_read_unlock();
- goto resume;
- }
- rcu_read_unlock();
- spin_unlock(&this_parent->d_lock);
- return;
-}
-
static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
{
substring_t args[MAX_OPT_ARGS];
@@ -315,49 +234,12 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
return 0;
}

-static int tracefs_apply_options(struct super_block *sb, bool remount)
-{
- struct tracefs_fs_info *fsi = sb->s_fs_info;
- struct inode *inode = d_inode(sb->s_root);
- struct tracefs_mount_opts *opts = &fsi->mount_opts;
- umode_t tmp_mode;
-
- /*
- * On remount, only reset mode/uid/gid if they were provided as mount
- * options.
- */
-
- if (!remount || opts->opts & BIT(Opt_mode)) {
- tmp_mode = READ_ONCE(inode->i_mode) & ~S_IALLUGO;
- tmp_mode |= opts->mode;
- WRITE_ONCE(inode->i_mode, tmp_mode);
- }
-
- if (!remount || opts->opts & BIT(Opt_uid))
- inode->i_uid = opts->uid;
-
- if (!remount || opts->opts & BIT(Opt_gid)) {
- /* Set all the group ids to the mount option */
- set_gid(sb->s_root, opts->gid);
- }
-
- return 0;
-}
-
static int tracefs_remount(struct super_block *sb, int *flags, char *data)
{
- int err;
struct tracefs_fs_info *fsi = sb->s_fs_info;

sync_filesystem(sb);
- err = tracefs_parse_options(data, &fsi->mount_opts);
- if (err)
- goto fail;
-
- tracefs_apply_options(sb, true);
-
-fail:
- return err;
+ return tracefs_parse_options(data, &fsi->mount_opts);
}

static int tracefs_show_options(struct seq_file *m, struct dentry *root)
@@ -399,8 +281,33 @@ static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode)
iput(inode);
}

+static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ struct tracefs_fs_info *fsi = dentry->d_sb->s_fs_info;
+ struct tracefs_mount_opts *opts = &fsi->mount_opts;
+ struct inode *inode;
+
+ rcu_read_lock();
+ inode = d_inode_rcu(dentry);
+ if (inode) {
+ if (opts->opts & BIT(Opt_mode)) {
+ umode_t tmp_mode;
+ tmp_mode = READ_ONCE(inode->i_mode) & ~S_IALLUGO;
+ tmp_mode |= opts->mode;
+ WRITE_ONCE(inode->i_mode, tmp_mode);
+ }
+ if (opts->opts & BIT(Opt_uid))
+ inode->i_uid = opts->uid;
+ if (opts->opts & BIT(Opt_gid))
+ inode->i_gid = opts->gid;
+ }
+ rcu_read_unlock();
+ return 0;
+}
+
static const struct dentry_operations tracefs_dentry_operations = {
.d_iput = tracefs_dentry_iput,
+ .d_revalidate = tracefs_d_revalidate,
};

static int trace_fill_super(struct super_block *sb, void *data, int silent)
@@ -427,8 +334,6 @@ static int trace_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &tracefs_super_operations;
sb->s_d_op = &tracefs_dentry_operations;

- tracefs_apply_options(sb, false);
-
return 0;

fail: