RE: [PATCH RFC 1/3] fs: introduce helper d_path_fast()

From: Justin He
Date: Mon May 10 2021 - 11:26:22 EST


Hi Linus
I don't know how to display the attachment in this mail.
So I copied here below, together with one comment:
> -----Original Message-----
> From: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
> Sent: Sunday, May 9, 2021 4:40 AM
> To: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
> Cc: Justin He <Justin.He@xxxxxxx>; Petr Mladek <pmladek@xxxxxxxx>; Steven
> Rostedt <rostedt@xxxxxxxxxxx>; Sergey Senozhatsky
> <senozhatsky@xxxxxxxxxxxx>; Andy Shevchenko
> <andriy.shevchenko@xxxxxxxxxxxxxxx>; Rasmus Villemoes
> <linux@xxxxxxxxxxxxxxxxxx>; Jonathan Corbet <corbet@xxxxxxx>; Al Viro
> <viro@xxxxxxxxxxxxxxxx>; Heiko Carstens <hca@xxxxxxxxxxxxx>; Vasily Gorbik
> <gor@xxxxxxxxxxxxx>; Christian Borntraeger <borntraeger@xxxxxxxxxx>; Eric
> W . Biederman <ebiederm@xxxxxxxxxxxx>; Darrick J. Wong
> <darrick.wong@xxxxxxxxxx>; Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>;
> Ira Weiny <ira.weiny@xxxxxxxxx>; Eric Biggers <ebiggers@xxxxxxxxxx>; Ahmed
> S. Darwish <a.darwish@xxxxxxxxxxxxx>; open list:DOCUMENTATION <linux-
> doc@xxxxxxxxxxxxxxx>; Linux Kernel Mailing List <linux-
> kernel@xxxxxxxxxxxxxxx>; linux-s390 <linux-s390@xxxxxxxxxxxxxxx>; linux-
> fsdevel <linux-fsdevel@xxxxxxxxxxxxxxx>
> Subject: Re: [PATCH RFC 1/3] fs: introduce helper d_path_fast()
>
> On Sat, May 8, 2021 at 12:13 PM Al Viro <viro@xxxxxxxxxxxxxxxxxx> wrote:
> >
> > Potentiallty delicate question is how to pass bptr/blen to the caller
> > of that helper...
>
> So I was thinking something like this patch..
>
> THIS IS TOTALLY UNTESTED!
>
> I've basically introduced a "struct prepend_buffer" that acts as that
> "end,len" pair and that gets passed around. It builds cleanly for me,
> which actually makes me fairly happy that it might even be close
> right, because the calling conventions would catch any mistake.
>
> It looks superficially sane to me, but again - untested.
>
> The patch ended up being bigger than I expected, but it's all fairly
> straightforward - just munging the calling conventions.
>
> Oh, and I did extract out the core of "prepend_path()" into
> "prepend_entries()" just to show how it would work.
>
> Linus
> fs/d_path.c | 227 ++++++++++++++++++++++++++++++++----------------------------
> 1 file changed, 123 insertions(+), 104 deletions(-)
>
>diff --git a/fs/d_path.c b/fs/d_path.c
>index 270d62133996..47eb29524271 100644
>--- a/fs/d_path.c
>+++ b/fs/d_path.c
>@@ -8,13 +8,18 @@
> #include <linux/prefetch.h>
> #include "mount.h"
>
>-static int prepend(char **buffer, int *buflen, const char *str, int namelen)
>+struct prepend_buffer {
>+ char *ptr;
>+ int len;
>+};
>+
>+static int prepend(struct prepend_buffer *b, const char *str, int namelen)
> {
>- *buflen -= namelen;
>- if (*buflen < 0)
>+ b->len -= namelen;
>+ if (b->len < 0)
> return -ENAMETOOLONG;
>- *buffer -= namelen;
>- memcpy(*buffer, str, namelen);
>+ b->ptr -= namelen;
>+ memcpy(b->ptr, str, namelen);
> return 0;
> }
>
>@@ -35,16 +40,16 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen)
> *
> * Load acquire is needed to make sure that we see that terminating NUL.
> */
>-static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
>+static int prepend_name(struct prepend_buffer *b, const struct qstr *name)
> {
> const char *dname = smp_load_acquire(&name->name); /* ^^^ */
> u32 dlen = READ_ONCE(name->len);
> char *p;
>
>- *buflen -= dlen + 1;
>- if (*buflen < 0)
>+ b->len -= dlen + 1;
>+ if (b->len < 0)
> return -ENAMETOOLONG;
>- p = *buffer -= dlen + 1;
>+ p = b->ptr -= dlen + 1;
> *p++ = '/';
> while (dlen--) {
> char c = *dname++;
>@@ -55,6 +60,50 @@ static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
> return 0;
> }
>
>+static inline int prepend_entries(struct prepend_buffer *b, const struct path *path, const struct path *root, struct mount *mnt)
>+{
>+ struct dentry *dentry = path->dentry;
>+ struct vfsmount *vfsmnt = path->mnt;
>+
>+ while (dentry != root->dentry || vfsmnt != root->mnt) {
>+ int error;
>+ struct dentry * parent;
>+
>+ if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
>+ struct mount *parent = READ_ONCE(mnt->mnt_parent);
>+ struct mnt_namespace *mnt_ns;
>+
>+ /* Escaped? */
>+ if (dentry != vfsmnt->mnt_root)
>+ return 3;
>+
>+ /* Global root? */
>+ if (mnt != parent) {
>+ dentry = READ_ONCE(mnt->mnt_mountpoint);
>+ mnt = parent;
>+ vfsmnt = &mnt->mnt;
>+ continue;
>+ }
>+ mnt_ns = READ_ONCE(mnt->mnt_ns);
>+ /* open-coded is_mounted() to use local mnt_ns */
>+ if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns))
>+ return 1; // absolute root
>+
>+ return 2; // detached or not attached yet
>+ break;
>+ }
>+ parent = dentry->d_parent;
>+ prefetch(parent);
>+ error = prepend_name(b, &dentry->d_name);
>+ if (error)
>+ break;
>+
>+ dentry = parent;
>+ }
>+ return 0;
>+}
>+
>+
> /**
> * prepend_path - Prepend path string to a buffer
> * @path: the dentry/vfsmount to report
>@@ -74,15 +123,12 @@ static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
> */
> static int prepend_path(const struct path *path,
> const struct path *root,
>- char **buffer, int *buflen)
>+ struct prepend_buffer *orig)
> {
>- struct dentry *dentry;
>- struct vfsmount *vfsmnt;
> struct mount *mnt;
> int error = 0;
> unsigned seq, m_seq = 0;
>- char *bptr;
>- int blen;
>+ struct prepend_buffer b;
>
> rcu_read_lock();
> restart_mnt:
>@@ -90,50 +136,12 @@ static int prepend_path(const struct path *path,
> seq = 0;
> rcu_read_lock();
> restart:
>- bptr = *buffer;
>- blen = *buflen;
>- error = 0;
>- dentry = path->dentry;
>- vfsmnt = path->mnt;
>- mnt = real_mount(vfsmnt);
>+ b = *orig;
>+ mnt = real_mount(path->mnt);
> read_seqbegin_or_lock(&rename_lock, &seq);
>- while (dentry != root->dentry || vfsmnt != root->mnt) {
>- struct dentry * parent;
>
>- if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
>- struct mount *parent = READ_ONCE(mnt->mnt_parent);
>- struct mnt_namespace *mnt_ns;
>+ error = prepend_entries(&b, path, root, mnt);
>
>- /* Escaped? */
>- if (dentry != vfsmnt->mnt_root) {
>- bptr = *buffer;
>- blen = *buflen;
>- error = 3;
>- break;
>- }
>- /* Global root? */
>- if (mnt != parent) {
>- dentry = READ_ONCE(mnt->mnt_mountpoint);
>- mnt = parent;
>- vfsmnt = &mnt->mnt;
>- continue;
>- }
>- mnt_ns = READ_ONCE(mnt->mnt_ns);
>- /* open-coded is_mounted() to use local mnt_ns */
>- if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns))
>- error = 1; // absolute root
>- else
>- error = 2; // detached or not attached yet
>- break;
>- }
>- parent = dentry->d_parent;
>- prefetch(parent);
>- error = prepend_name(&bptr, &blen, &dentry->d_name);
>- if (error)
>- break;
>-
>- dentry = parent;
>- }
> if (!(seq & 1))
> rcu_read_unlock();
> if (need_seqretry(&rename_lock, seq)) {
>@@ -150,14 +158,17 @@ static int prepend_path(const struct path *path,
> }
> done_seqretry(&mount_lock, m_seq);
>
>- if (error >= 0 && bptr == *buffer) {
>- if (--blen < 0)
>+ // Escaped? No path
>+ if (error == 3)
>+ b = *orig;
>+
>+ if (error >= 0 && b.ptr == orig->ptr) {
>+ if (--b.len < 0)
> error = -ENAMETOOLONG;
> else
>- *--bptr = '/';
>+ *--b.ptr = '/';
> }
>- *buffer = bptr;
>- *buflen = blen;
>+ *orig = b;
> return error;
> }
>
>@@ -181,34 +192,34 @@ char *__d_path(const struct path *path,
> const struct path *root,
> char *buf, int buflen)
> {
>- char *res = buf + buflen;
>+ struct prepend_buffer b = { buf + buflen, buflen };
> int error;
>
>- prepend(&res, &buflen, "\0", 1);
>- error = prepend_path(path, root, &res, &buflen);
>+ prepend(&b, "\0", 1);
>+ error = prepend_path(path, root, &b);
>
> if (error < 0)
> return ERR_PTR(error);
> if (error > 0)
> return NULL;
>- return res;
>+ return b.ptr;
> }
>
> char *d_absolute_path(const struct path *path,
> char *buf, int buflen)
> {
> struct path root = {};
>- char *res = buf + buflen;
>+ struct prepend_buffer b = { buf + buflen, buflen };
> int error;
>
>- prepend(&res, &buflen, "\0", 1);
>- error = prepend_path(path, &root, &res, &buflen);
>+ prepend(&b, "\0", 1);
>+ error = prepend_path(path, &root, &b);
>
> if (error > 1)
> error = -EINVAL;
> if (error < 0)
> return ERR_PTR(error);
>- return res;
>+ return b.ptr;
> }
>
> /*
>@@ -216,21 +227,21 @@ char *d_absolute_path(const struct path *path,
> */
> static int path_with_deleted(const struct path *path,
> const struct path *root,
>- char **buf, int *buflen)
>+ struct prepend_buffer *b)
> {
>- prepend(buf, buflen, "\0", 1);
>+ prepend(b, "\0", 1);
> if (d_unlinked(path->dentry)) {
>- int error = prepend(buf, buflen, " (deleted)", 10);
>+ int error = prepend(b, " (deleted)", 10);
> if (error)
> return error;
> }
>
>- return prepend_path(path, root, buf, buflen);
>+ return prepend_path(path, root, b);
> }
>
>-static int prepend_unreachable(char **buffer, int *buflen)
>+static int prepend_unreachable(struct prepend_buffer *b)
> {
>- return prepend(buffer, buflen, "(unreachable)", 13);
>+ return prepend(b, "(unreachable)", 13);
> }
>
> static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
>@@ -261,7 +272,7 @@ static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
> */
> char *d_path(const struct path *path, char *buf, int buflen)
> {
>- char *res = buf + buflen;
>+ struct prepend_buffer b = { buf + buflen, buflen };
> struct path root;
> int error;
>
>@@ -282,12 +293,12 @@ char *d_path(const struct path *path, char *buf, int buflen)
>
> rcu_read_lock();
> get_fs_root_rcu(current->fs, &root);
>- error = path_with_deleted(path, &root, &res, &buflen);
>+ error = path_with_deleted(path, &root, &b);
> rcu_read_unlock();
>
> if (error < 0)
>- res = ERR_PTR(error);
>- return res;
>+ return ERR_PTR(error);
>+ return b.ptr;
> }
> EXPORT_SYMBOL(d_path);
>
>@@ -314,13 +325,14 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
>
> char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
> {
>- char *end = buffer + buflen;
>+ struct prepend_buffer b = { buffer + buflen, buflen };
>+
> /* these dentries are never renamed, so d_lock is not needed */
>- if (prepend(&end, &buflen, " (deleted)", 11) ||
>- prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
>- prepend(&end, &buflen, "/", 1))
>- end = ERR_PTR(-ENAMETOOLONG);
>- return end;
>+ if (prepend(&b, " (deleted)", 11) ||
>+ prepend(&b, dentry->d_name.name, dentry->d_name.len) ||
>+ prepend(&b, "/", 1))
>+ return ERR_PTR(-ENAMETOOLONG);
>+ return b.ptr;
> }
>
> /*
>@@ -329,8 +341,9 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
> static char *__dentry_path(const struct dentry *d, char *buf, int buflen)
> {
> const struct dentry *dentry;
>- char *end, *retval;
>- int len, seq = 0;
>+ struct prepend_buffer b;
>+ char *retval;
>+ int seq = 0;
> int error = 0;
>
> if (buflen < 2)
>@@ -339,22 +352,22 @@ static char *__dentry_path(const struct dentry *d, char *buf, int buflen)
> rcu_read_lock();
> restart:
> dentry = d;
>- end = buf + buflen;
>- len = buflen;
>- prepend(&end, &len, "\0", 1);
>+ b.ptr = buf + buflen;
>+ b.len = buflen;
>+ prepend(&b, "\0", 1);
> /* Get '/' right */
>- retval = end-1;
>+ retval = b.ptr-1;
> *retval = '/';
> read_seqbegin_or_lock(&rename_lock, &seq);
> while (!IS_ROOT(dentry)) {
> const struct dentry *parent = dentry->d_parent;
>
> prefetch(parent);
>- error = prepend_name(&end, &len, &dentry->d_name);
>+ error = prepend_name(&b, &dentry->d_name);
> if (error)
> break;
>
>- retval = end;
>+ retval = b.ptr;
> dentry = parent;
> }
> if (!(seq & 1))
>@@ -379,16 +392,23 @@ EXPORT_SYMBOL(dentry_path_raw);
>
> char *dentry_path(const struct dentry *dentry, char *buf, int buflen)
> {
>- char *p = NULL;
>+ struct prepend_buffer b = { buf + buflen, buflen };
> char *retval;
>+ char *p = NULL;
>
> if (d_unlinked(dentry)) {
>- p = buf + buflen;
>- if (prepend(&p, &buflen, "//deleted", 10) != 0)
>+ if (prepend(&b, "//deleted", 10) != 0)
> goto Elong;
>- buflen++;
>+
>+ // save away beginning of "//deleted" string
>+ // and let "__dentry_path()" overwrite one byte
>+ // with the terminating NUL that we'll restore
>+ // below.
>+ p = b.ptr;
>+ b.ptr++;
>+ b.len++;
> }
>- retval = __dentry_path(dentry, buf, buflen);
>+ retval = __dentry_path(dentry, b.ptr, b.len);

I didn't quite understand the logic here. Seems it is not equal to
the previous. Should it be s/b.ptr/buf here? Otherwise, in __dentry_path,
it will use the range [b.ptr, b.ptr+b.len] instead of [buf, buf+b.len].
Am I missing anything here?

--
Cheers,
Justin (Jia He)

> if (!IS_ERR(retval) && p)
> *p = '/'; /* restore '/' overriden with '\0' */
> return retval;
>@@ -441,11 +461,10 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
> error = -ENOENT;
> if (!d_unlinked(pwd.dentry)) {
> unsigned long len;
>- char *cwd = page + PATH_MAX;
>- int buflen = PATH_MAX;
>+ struct prepend_buffer b = { page + PATH_MAX, PATH_MAX };
>
>- prepend(&cwd, &buflen, "\0", 1);
>- error = prepend_path(&pwd, &root, &cwd, &buflen);
>+ prepend(&b, "\0", 1);
>+ error = prepend_path(&pwd, &root, &b);
> rcu_read_unlock();
>
> if (error < 0)
>@@ -453,16 +472,16 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
>
> /* Unreachable from current root */
> if (error > 0) {
>- error = prepend_unreachable(&cwd, &buflen);
>+ error = prepend_unreachable(&b);
> if (error)
> goto out;
> }
>
> error = -ERANGE;
>- len = PATH_MAX + page - cwd;
>+ len = PATH_MAX + page - b.ptr;
> if (len <= size) {
> error = len;
>- if (copy_to_user(buf, cwd, len))
>+ if (copy_to_user(buf, b.ptr, len))
> error = -EFAULT;
> }
> } else {
>
IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.