Re: [PATCH v4 43/45] namei: initialize parameters passed to step_into()

From: Alexander Potapenko
Date: Mon Jul 04 2022 - 04:21:41 EST


On Mon, Jul 4, 2022 at 4:53 AM Al Viro <viro@xxxxxxxxxxxxxxxxxx> wrote:
>
> On Sat, Jul 02, 2022 at 10:23:16AM -0700, Linus Torvalds wrote:
>
> > Al - can you please take a quick look?
>
> FWIW, trying to write a coherent documentation had its usual effect...
> The thing is, we don't really need to fetch the inode that early.
> All we really care about is that in RCU mode ->d_seq gets sampled
> before we fetch ->d_inode *and* we don't treat "it looks negative"
> as hard -ENOENT in case of ->d_seq mismatch.
>
> Which can be bloody well left to step_into(). So we don't need
> to pass it inode argument at all - just dentry and seq. Makes
> a bunch of functions simpler as well...
>
> It does *not* deal with the "uninitialized" seq argument in
> !RCU case; I'll handle that in the followup, but that's a separate
> story, IMO (and very clearly a false positive).

I can confirm that your patch fixes KMSAN reports on inode, yet the
following reports still persist:

=====================================================
BUG: KMSAN: uninit-value in walk_component+0x5e7/0x6c0 fs/namei.c:1996
walk_component+0x5e7/0x6c0 fs/namei.c:1996
lookup_last fs/namei.c:2445
path_lookupat+0x27d/0x6f0 fs/namei.c:2468
filename_lookup+0x24c/0x800 fs/namei.c:2497
kern_path+0x79/0x3a0 fs/namei.c:2587
init_stat+0x72/0x13f fs/init.c:132
clean_path+0x74/0x24c init/initramfs.c:339
do_name+0x12d/0xc17 init/initramfs.c:371
write_buffer init/initramfs.c:457
unpack_to_rootfs+0x49a/0xd9e init/initramfs.c:510
do_populate_rootfs+0x57/0x40f init/initramfs.c:699
async_run_entry_fn+0x8f/0x400 kernel/async.c:127
process_one_work+0xb27/0x13e0 kernel/workqueue.c:2289
worker_thread+0x1076/0x1d60 kernel/workqueue.c:2436
kthread+0x31b/0x430 kernel/kthread.c:376
ret_from_fork+0x1f/0x30 ??:?

Local variable seq created at:
walk_component+0x46/0x6c0 fs/namei.c:1981
lookup_last fs/namei.c:2445
path_lookupat+0x27d/0x6f0 fs/namei.c:2468

CPU: 0 PID: 10 Comm: kworker/u9:0 Tainted: G B
5.19.0-rc4-00059-gcf2d25715943-dirty #103
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
Workqueue: events_unbound async_run_entry_fn
=====================================================

What makes you think they are false positives? Is the scenario I
described above:

"""
In particular, if the call to lookup_fast() in walk_component()
returns NULL, and lookup_slow() returns a valid dentry, then the
`seq` and `inode` will remain uninitialized until the call to
step_into()
"""

impossible?

> Cumulative diff follows; splitup is in #work.namei. Comments?
>
> diff --git a/fs/namei.c b/fs/namei.c
> index 1f28d3f463c3..7f4f61ade9e3 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -1467,7 +1467,7 @@ EXPORT_SYMBOL(follow_down);
> * we meet a managed dentry that would need blocking.
> */
> static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
> - struct inode **inode, unsigned *seqp)
> + unsigned *seqp)
> {
> struct dentry *dentry = path->dentry;
> unsigned int flags = dentry->d_flags;
> @@ -1497,13 +1497,6 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
> dentry = path->dentry = mounted->mnt.mnt_root;
> nd->state |= ND_JUMPED;
> *seqp = read_seqcount_begin(&dentry->d_seq);
> - *inode = dentry->d_inode;
> - /*
> - * We don't need to re-check ->d_seq after this
> - * ->d_inode read - there will be an RCU delay
> - * between mount hash removal and ->mnt_root
> - * becoming unpinned.
> - */
> flags = dentry->d_flags;
> continue;
> }
> @@ -1515,8 +1508,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
> }
>
> static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
> - struct path *path, struct inode **inode,
> - unsigned int *seqp)
> + struct path *path, unsigned int *seqp)
> {
> bool jumped;
> int ret;
> @@ -1525,9 +1517,7 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
> path->dentry = dentry;
> if (nd->flags & LOOKUP_RCU) {
> unsigned int seq = *seqp;
> - if (unlikely(!*inode))
> - return -ENOENT;
> - if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
> + if (likely(__follow_mount_rcu(nd, path, seqp)))
> return 0;
> if (!try_to_unlazy_next(nd, dentry, seq))
> return -ECHILD;
> @@ -1547,7 +1537,6 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
> if (path->mnt != nd->path.mnt)
> mntput(path->mnt);
> } else {
> - *inode = d_backing_inode(path->dentry);
> *seqp = 0; /* out of RCU mode, so the value doesn't matter */
> }
> return ret;
> @@ -1607,9 +1596,7 @@ static struct dentry *__lookup_hash(const struct qstr *name,
> return dentry;
> }
>
> -static struct dentry *lookup_fast(struct nameidata *nd,
> - struct inode **inode,
> - unsigned *seqp)
> +static struct dentry *lookup_fast(struct nameidata *nd, unsigned *seqp)
> {
> struct dentry *dentry, *parent = nd->path.dentry;
> int status = 1;
> @@ -1628,22 +1615,11 @@ static struct dentry *lookup_fast(struct nameidata *nd,
> return NULL;
> }
>
> - /*
> - * This sequence count validates that the inode matches
> - * the dentry name information from lookup.
> - */
> - *inode = d_backing_inode(dentry);
> - if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
> - return ERR_PTR(-ECHILD);
> -
> - /*
> + /*
> * This sequence count validates that the parent had no
> * changes while we did the lookup of the dentry above.
> - *
> - * The memory barrier in read_seqcount_begin of child is
> - * enough, we can use __read_seqcount_retry here.
> */
> - if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq)))
> + if (unlikely(read_seqcount_retry(&parent->d_seq, nd->seq)))
> return ERR_PTR(-ECHILD);
>
> *seqp = seq;
> @@ -1838,13 +1814,21 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
> * for the common case.
> */
> static const char *step_into(struct nameidata *nd, int flags,
> - struct dentry *dentry, struct inode *inode, unsigned seq)
> + struct dentry *dentry, unsigned seq)
> {
> struct path path;
> - int err = handle_mounts(nd, dentry, &path, &inode, &seq);
> + struct inode *inode;
> + int err = handle_mounts(nd, dentry, &path, &seq);
>
> if (err < 0)
> return ERR_PTR(err);
> + inode = path.dentry->d_inode;
> + if (unlikely(!inode)) {
> + if ((nd->flags & LOOKUP_RCU) &&
> + read_seqcount_retry(&path.dentry->d_seq, seq))
> + return ERR_PTR(-ECHILD);
> + return ERR_PTR(-ENOENT);
> + }
> if (likely(!d_is_symlink(path.dentry)) ||
> ((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) ||
> (flags & WALK_NOFOLLOW)) {
> @@ -1870,9 +1854,7 @@ static const char *step_into(struct nameidata *nd, int flags,
> return pick_link(nd, &path, inode, seq, flags);
> }
>
> -static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
> - struct inode **inodep,
> - unsigned *seqp)
> +static struct dentry *follow_dotdot_rcu(struct nameidata *nd, unsigned *seqp)
> {
> struct dentry *parent, *old;
>
> @@ -1895,7 +1877,6 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
> }
> old = nd->path.dentry;
> parent = old->d_parent;
> - *inodep = parent->d_inode;
> *seqp = read_seqcount_begin(&parent->d_seq);
> if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
> return ERR_PTR(-ECHILD);
> @@ -1910,9 +1891,7 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
> return NULL;
> }
>
> -static struct dentry *follow_dotdot(struct nameidata *nd,
> - struct inode **inodep,
> - unsigned *seqp)
> +static struct dentry *follow_dotdot(struct nameidata *nd, unsigned *seqp)
> {
> struct dentry *parent;
>
> @@ -1937,7 +1916,6 @@ static struct dentry *follow_dotdot(struct nameidata *nd,
> return ERR_PTR(-ENOENT);
> }
> *seqp = 0;
> - *inodep = parent->d_inode;
> return parent;
>
> in_root:
> @@ -1952,7 +1930,6 @@ static const char *handle_dots(struct nameidata *nd, int type)
> if (type == LAST_DOTDOT) {
> const char *error = NULL;
> struct dentry *parent;
> - struct inode *inode;
> unsigned seq;
>
> if (!nd->root.mnt) {
> @@ -1961,17 +1938,17 @@ static const char *handle_dots(struct nameidata *nd, int type)
> return error;
> }
> if (nd->flags & LOOKUP_RCU)
> - parent = follow_dotdot_rcu(nd, &inode, &seq);
> + parent = follow_dotdot_rcu(nd, &seq);
> else
> - parent = follow_dotdot(nd, &inode, &seq);
> + parent = follow_dotdot(nd, &seq);
> if (IS_ERR(parent))
> return ERR_CAST(parent);
> if (unlikely(!parent))
> error = step_into(nd, WALK_NOFOLLOW,
> - nd->path.dentry, nd->inode, nd->seq);
> + nd->path.dentry, nd->seq);
> else
> error = step_into(nd, WALK_NOFOLLOW,
> - parent, inode, seq);
> + parent, seq);
> if (unlikely(error))
> return error;
>
> @@ -1995,7 +1972,6 @@ static const char *handle_dots(struct nameidata *nd, int type)
> static const char *walk_component(struct nameidata *nd, int flags)
> {
> struct dentry *dentry;
> - struct inode *inode;
> unsigned seq;
> /*
> * "." and ".." are special - ".." especially so because it has
> @@ -2007,7 +1983,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
> put_link(nd);
> return handle_dots(nd, nd->last_type);
> }
> - dentry = lookup_fast(nd, &inode, &seq);
> + dentry = lookup_fast(nd, &seq);
> if (IS_ERR(dentry))
> return ERR_CAST(dentry);
> if (unlikely(!dentry)) {
> @@ -2017,7 +1993,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
> }
> if (!(flags & WALK_MORE) && nd->depth)
> put_link(nd);
> - return step_into(nd, flags, dentry, inode, seq);
> + return step_into(nd, flags, dentry, seq);
> }
>
> /*
> @@ -2473,8 +2449,7 @@ static int handle_lookup_down(struct nameidata *nd)
> {
> if (!(nd->flags & LOOKUP_RCU))
> dget(nd->path.dentry);
> - return PTR_ERR(step_into(nd, WALK_NOFOLLOW,
> - nd->path.dentry, nd->inode, nd->seq));
> + return PTR_ERR(step_into(nd, WALK_NOFOLLOW, nd->path.dentry, nd->seq));
> }
>
> /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
> @@ -3394,7 +3369,6 @@ static const char *open_last_lookups(struct nameidata *nd,
> int open_flag = op->open_flag;
> bool got_write = false;
> unsigned seq;
> - struct inode *inode;
> struct dentry *dentry;
> const char *res;
>
> @@ -3410,7 +3384,7 @@ static const char *open_last_lookups(struct nameidata *nd,
> if (nd->last.name[nd->last.len])
> nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
> /* we _can_ be in RCU mode here */
> - dentry = lookup_fast(nd, &inode, &seq);
> + dentry = lookup_fast(nd, &seq);
> if (IS_ERR(dentry))
> return ERR_CAST(dentry);
> if (likely(dentry))
> @@ -3464,7 +3438,7 @@ static const char *open_last_lookups(struct nameidata *nd,
> finish_lookup:
> if (nd->depth)
> put_link(nd);
> - res = step_into(nd, WALK_TRAILING, dentry, inode, seq);
> + res = step_into(nd, WALK_TRAILING, dentry, seq);
> if (unlikely(res))
> nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
> return res;



--
Alexander Potapenko
Software Engineer

Google Germany GmbH
Erika-Mann-Straße, 33
80636 München

Geschäftsführer: Paul Manicle, Liana Sebastian
Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg

Diese E-Mail ist vertraulich. Falls Sie diese fälschlicherweise
erhalten haben sollten, leiten Sie diese bitte nicht an jemand anderes
weiter, löschen Sie alle Kopien und Anhänge davon und lassen Sie mich
bitte wissen, dass die E-Mail an die falsche Person gesendet wurde.


This e-mail is confidential. If you received this communication by
mistake, please don't forward it to anyone else, please erase all
copies and attachments, and please let me know that it has gone to the
wrong person.