[RFC PATCH] getting misc stats/attributes via xattr API
From: Miklos Szeredi
Date: Tue May 03 2022 - 08:23:40 EST
This is a simplification of the getvalues(2) prototype and moving it to the
getxattr(2) interface, as suggested by Dave.
The patch itself just adds the possibility to retrieve a single line of
/proc/$$/mountinfo (which was the basic requirement from which the fsinfo
patchset grew out of).
But this should be able to serve Amir's per-sb iostats, as well as a host of
other cases where some statistic needs to be retrieved from some object. Note:
a filesystem object often represents other kinds of objects (such as processes
in /proc) so this is not limited to fs attributes.
This also opens up the interface to setting attributes via setxattr(2).
After some pondering I made the namespace so:
: - root
bar - an attribute
foo: - a folder (can contain attributes and/or folders)
The contents of a folder is represented by a null separated list of names.
Examples:
$ getfattr -etext -n ":" .
# file: .
:="mnt:\000mntns:"
$ getfattr -etext -n ":mnt:" .
# file: .
:mnt:="info"
$ getfattr -etext -n ":mnt:info" .
# file: .
:mnt:info="21 1 254:0 / / rw,relatime - ext4 /dev/root rw\012"
$ getfattr -etext -n ":mntns:" .
# file: .
:mntns:="21:\00022:\00024:\00025:\00023:\00026:\00027:\00028:\00029:\00030:\00031:"
$ getfattr -etext -n ":mntns:28:" .
# file: .
:mntns:28:="info"
Comments?
Thanks,
Miklos
---
fs/Makefile | 2
fs/mount.h | 8 +
fs/namespace.c | 15 ++-
fs/pnode.h | 2
fs/proc_namespace.c | 15 ++-
fs/values.c | 242 +++++++++++++++++++++++++++++++++++++++++++++++++
fs/xattr.c | 16 ++-
include/linux/values.h | 11 ++
8 files changed, 295 insertions(+), 16 deletions(-)
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -16,7 +16,7 @@ obj-y := open.o read_write.o file_table.
pnode.o splice.o sync.o utimes.o d_path.o \
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
fs_types.o fs_context.o fs_parser.o fsopen.o init.o \
- kernel_read_file.o remap_range.o
+ kernel_read_file.o remap_range.o values.o
ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o direct-io.o mpage.o
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -148,3 +148,11 @@ static inline bool is_anon_ns(struct mnt
}
extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);
+
+struct mount *mnt_list_next(struct mnt_namespace *ns, struct list_head *p);
+extern void namespace_lock_read(void);
+extern void namespace_unlock_read(void);
+extern int show_mountinfo_root(struct seq_file *m, struct vfsmount *mnt,
+ struct path *root);
+extern bool is_path_reachable(struct mount *, struct dentry *,
+ const struct path *root);
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1332,9 +1332,7 @@ struct vfsmount *mnt_clone_internal(cons
return &p->mnt;
}
-#ifdef CONFIG_PROC_FS
-static struct mount *mnt_list_next(struct mnt_namespace *ns,
- struct list_head *p)
+struct mount *mnt_list_next(struct mnt_namespace *ns, struct list_head *p)
{
struct mount *mnt, *ret = NULL;
@@ -1351,6 +1349,7 @@ static struct mount *mnt_list_next(struc
return ret;
}
+#ifdef CONFIG_PROC_FS
/* iterator; we want it to have access to namespace_sem, thus here... */
static void *m_start(struct seq_file *m, loff_t *pos)
{
@@ -1507,6 +1506,16 @@ static inline void namespace_lock(void)
down_write(&namespace_sem);
}
+void namespace_lock_read(void)
+{
+ down_read(&namespace_sem);
+}
+
+void namespace_unlock_read(void)
+{
+ up_read(&namespace_sem);
+}
+
enum umount_tree_flags {
UMOUNT_SYNC = 1,
UMOUNT_PROPAGATE = 2,
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -50,7 +50,5 @@ void mnt_set_mountpoint(struct mount *,
void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp,
struct mount *mnt);
struct mount *copy_tree(struct mount *, struct dentry *, int);
-bool is_path_reachable(struct mount *, struct dentry *,
- const struct path *root);
int count_mounts(struct mnt_namespace *ns, struct mount *mnt);
#endif /* _LINUX_PNODE_H */
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -132,9 +132,9 @@ static int show_vfsmnt(struct seq_file *
return err;
}
-static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
+int show_mountinfo_root(struct seq_file *m, struct vfsmount *mnt,
+ struct path *root)
{
- struct proc_mounts *p = m->private;
struct mount *r = real_mount(mnt);
struct super_block *sb = mnt->mnt_sb;
struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
@@ -152,7 +152,7 @@ static int show_mountinfo(struct seq_fil
seq_putc(m, ' ');
/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
- err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
+ err = seq_path_root(m, &mnt_path, root, " \t\n\\");
if (err)
goto out;
@@ -164,7 +164,7 @@ static int show_mountinfo(struct seq_fil
seq_printf(m, " shared:%i", r->mnt_group_id);
if (IS_MNT_SLAVE(r)) {
int master = r->mnt_master->mnt_group_id;
- int dom = get_dominating_id(r, &p->root);
+ int dom = get_dominating_id(r, root);
seq_printf(m, " master:%i", master);
if (dom && dom != master)
seq_printf(m, " propagate_from:%i", dom);
@@ -194,6 +194,13 @@ static int show_mountinfo(struct seq_fil
return err;
}
+static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
+{
+ struct proc_mounts *p = m->private;
+
+ return show_mountinfo_root(m, mnt, &p->root);
+}
+
static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
{
struct proc_mounts *p = m->private;
--- /dev/null
+++ b/fs/values.c
@@ -0,0 +1,242 @@
+#include <linux/values.h>
+#include <linux/fs_struct.h>
+#include <linux/seq_file.h>
+#include <linux/nsproxy.h>
+#include "../lib/kstrtox.h"
+#include "mount.h"
+
+struct val_string {
+ const char *str;
+ size_t len;
+};
+
+struct val_iter {
+ struct val_string name;
+ struct seq_file seq;
+ int error;
+};
+
+struct val_desc {
+ struct val_string name;
+ union {
+ u64 idx;
+ int (*get)(struct val_iter *vi, const struct path *path);
+ };
+};
+
+#define VAL_STRING(x) { .str = x, .len = sizeof(x) - 1 }
+#define VD_NAME(x) .name = VAL_STRING(x)
+
+static int val_err(struct val_iter *vi, int err)
+{
+ vi->error = err;
+ return 0;
+}
+
+static int val_end_seq(struct val_iter *vi)
+{
+ if (vi->seq.count == vi->seq.size)
+ return -EOVERFLOW;
+
+ return 0;
+}
+
+static inline void val_string_skip(struct val_string *s, size_t count)
+{
+ WARN_ON(s->len < count);
+ s->str += count;
+ s->len -= count;
+}
+
+static bool val_string_prefix(const struct val_string *p,
+ const struct val_string *s)
+{
+ return s->len >= p->len && !memcmp(s->str, p->str, p->len);
+}
+
+static struct val_desc *val_lookup(struct val_iter *vi, struct val_desc *vd)
+{
+ for (; vd->name.len; vd++) {
+ if (val_string_prefix(&vd->name, &vi->name)) {
+ val_string_skip(&vi->name, vd->name.len);
+ break;
+ }
+ }
+ return vd;
+}
+
+static int val_get_group(struct val_iter *vi, struct val_desc *vd)
+{
+ for (; vd->name.len; vd++)
+ seq_write(&vi->seq, vd->name.str, vd->name.len + 1);
+
+ return val_end_seq(vi);
+}
+
+enum {
+ VAL_MNT_INFO,
+};
+
+static struct val_desc val_mnt_group[] = {
+ { VD_NAME("info"), .idx = VAL_MNT_INFO },
+ { }
+};
+
+static int val_mnt_show(struct val_iter *vi, struct vfsmount *mnt)
+{
+ struct val_desc *vd = val_lookup(vi, val_mnt_group);
+ struct path root;
+
+ if (!vd->name.str)
+ return val_err(vi, -ENOENT);
+
+ switch(vd->idx) {
+ case VAL_MNT_INFO:
+ get_fs_root(current->fs, &root);
+ show_mountinfo_root(&vi->seq, mnt, &root);
+ path_put(&root);
+ break;
+ }
+
+ return 0;
+}
+
+static int val_mnt_get(struct val_iter *vi, const struct path *path)
+{
+ int err;
+
+ if (!vi->name.len)
+ return val_get_group(vi, val_mnt_group);
+
+ namespace_lock_read();
+ err = val_mnt_show(vi, path->mnt);
+ namespace_unlock_read();
+
+ return err;
+}
+
+/* called with namespace_sem held for read */
+static struct vfsmount *mnt_lookup_by_id(struct mnt_namespace *ns,
+ struct path *root, int id)
+{
+ struct mount *m;
+
+ for (m = mnt_list_next(ns, &ns->list); m; m = mnt_list_next(ns, &m->mnt_list)) {
+ if (m->mnt_id == id) {
+ if (is_path_reachable(m, m->mnt.mnt_root, root))
+ return mntget(&m->mnt);
+ else
+ return NULL;
+ }
+ }
+ return NULL;
+}
+
+static void seq_mnt_list(struct seq_file *seq, struct mnt_namespace *ns,
+ struct path *root)
+{
+ struct mount *m;
+
+ namespace_lock_read();
+ for (m = mnt_list_next(ns, &ns->list); m; m = mnt_list_next(ns, &m->mnt_list)) {
+ if (is_path_reachable(m, m->mnt.mnt_root, root)) {
+ seq_printf(seq, "%i:", m->mnt_id);
+ seq_putc(seq, '\0');
+ }
+ }
+ namespace_unlock_read();
+}
+
+static int val_mntns_get(struct val_iter *vi, const struct path *path)
+{
+ struct mnt_namespace *mnt_ns = current->nsproxy->mnt_ns;
+ struct vfsmount *mnt;
+ struct path root;
+ unsigned long long mnt_id;
+ unsigned int end;
+ int err;
+
+ if (!vi->name.len) {
+ get_fs_root(current->fs, &root);
+ seq_mnt_list(&vi->seq, mnt_ns, &root);
+ path_put(&root);
+ return val_end_seq(vi);
+ }
+
+ end = _parse_integer(vi->name.str, 10, &mnt_id);
+ if (end & KSTRTOX_OVERFLOW)
+ return val_err(vi, -ENOENT);
+ if (vi->name.str[end] != VAL_SEP)
+ return val_err(vi, -ENOENT);
+ val_string_skip(&vi->name, end + 1);
+
+ namespace_lock_read();
+ get_fs_root(current->fs, &root);
+ mnt = mnt_lookup_by_id(mnt_ns, &root, mnt_id);
+ path_put(&root);
+ if (!mnt) {
+ namespace_unlock_read();
+ return val_err(vi, -ENOENT);
+ }
+ if (vi->name.len)
+ err = val_mnt_show(vi, mnt);
+ else
+ err = val_get_group(vi, val_mnt_group);
+
+ namespace_unlock_read();
+ mntput(mnt);
+
+ return err;
+}
+
+
+
+static struct val_desc val_toplevel_group[] = {
+ { VD_NAME("mnt:"), .get = val_mnt_get, },
+ { VD_NAME("mntns:"), .get = val_mntns_get, },
+ { },
+};
+
+static int getvalues(struct val_iter *vi, const struct path *path)
+{
+ struct val_desc *vd;
+ int err;
+
+ if (!vi->name.len)
+ return val_get_group(vi, val_toplevel_group);
+
+ vd = val_lookup(vi, val_toplevel_group);
+ if (!vd->name.len)
+ err = val_err(vi, -ENOENT);
+ else
+ err = vd->get(vi, path);
+
+ return err ?: vi->error;
+}
+
+ssize_t val_getxattr(struct path *path, const char *name, size_t namelen,
+ void __user *value, size_t size)
+{
+ int err;
+ char val[1024];
+ struct val_iter vi = {
+ .name = { .str = name, .len = namelen },
+ .seq = { .buf = val, .size = min(sizeof(val), size) },
+ };
+
+ if (!size)
+ return sizeof(val);
+
+ val_string_skip(&vi.name, 1);
+
+ err = getvalues(&vi, path);
+ if (err < 0)
+ return err;
+
+ WARN_ON(vi.seq.count > size);
+ if (copy_to_user(value, vi.seq.buf, vi.seq.count))
+ return -EFAULT;
+
+ return vi.seq.count;
+}
+
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -22,6 +22,7 @@
#include <linux/audit.h>
#include <linux/vmalloc.h>
#include <linux/posix_acl_xattr.h>
+#include <linux/values.h>
#include <linux/uaccess.h>
@@ -643,12 +644,13 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, cons
* Extended attribute GET operations
*/
static ssize_t
-getxattr(struct user_namespace *mnt_userns, struct dentry *d,
- const char __user *name, void __user *value, size_t size)
+getxattr(struct path *path, const char __user *name,
+ void __user *value, size_t size)
{
ssize_t error;
void *kvalue = NULL;
char kname[XATTR_NAME_MAX + 1];
+ struct user_namespace *mnt_userns = mnt_user_ns(path->mnt);
error = strncpy_from_user(kname, name, sizeof(kname));
if (error == 0 || error == sizeof(kname))
@@ -656,6 +658,9 @@ getxattr(struct user_namespace *mnt_user
if (error < 0)
return error;
+ if (kname[0] == VAL_SEP)
+ return val_getxattr(path, kname, error, value, size);
+
if (size) {
if (size > XATTR_SIZE_MAX)
size = XATTR_SIZE_MAX;
@@ -664,7 +669,7 @@ getxattr(struct user_namespace *mnt_user
return -ENOMEM;
}
- error = vfs_getxattr(mnt_userns, d, kname, kvalue, size);
+ error = vfs_getxattr(mnt_userns, path->dentry, kname, kvalue, size);
if (error > 0) {
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
@@ -693,7 +698,7 @@ static ssize_t path_getxattr(const char
error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
if (error)
return error;
- error = getxattr(mnt_user_ns(path.mnt), path.dentry, name, value, size);
+ error = getxattr(&path, name, value, size);
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
@@ -723,8 +728,7 @@ SYSCALL_DEFINE4(fgetxattr, int, fd, cons
if (!f.file)
return error;
audit_file(f.file);
- error = getxattr(file_mnt_user_ns(f.file), f.file->f_path.dentry,
- name, value, size);
+ error = getxattr(&f.file->f_path, name, value, size);
fdput(f);
return error;
}
--- /dev/null
+++ b/include/linux/values.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/types.h>
+
+#define VAL_SEP ':'
+
+struct path;
+
+ssize_t val_getxattr(struct path *path, const char *name, size_t namelen,
+ void __user *value, size_t size);
+