Re: [PATCH] binderfs: implement "max" mount option

From: Christian Brauner
Date: Sun Dec 23 2018 - 09:04:22 EST


On Sat, Dec 22, 2018 at 10:18:06PM +0100, Christian Brauner wrote:
> Since binderfs can be mounted by userns root in non-initial user namespaces
> some precautions are in order. First, a way to set a maximum on the number
> of binder devices that can be allocated per binderfs instance and second, a
> way to reserve a reasonable chunk of binderfs devices for the initial ipc
> namespace.
> A first approach as seen in [1] used sysctls similiar to devpts but was
> shown to be flawed (cf. [2] and [3]) since some aspects were unneeded. This
> is an alternative approach which avoids sysctls completely and instead
> switches to a single mount option.
>
> Starting with this commit binderfs instances can be mounted with a limit on
> the number of binder devices that can be allocated. The max=<count> mount
> option serves as a per-instance limit. If max=<count> is set then only
> <count> number of binder devices can be allocated in this binderfs
> instance.
> Additionally, the binderfs instance in the initial ipc namespace will
> always have a reserve of at least 1024 binder devices unless explicitly
> capped via max=<count>.
>
> This allows to safely bind-mount binderfs instances into unprivileged user
> namespaces since userns root in a non-initial user namespace cannot change
> the mount option as long as it does not own the mount namespace the
> binderfs mount was created in and hence cannot drain the host of minor
> device numbers
>
> [1]: https://lore.kernel.org/lkml/20181221133909.18794-1-christian@xxxxxxxxxx/
> [2]; https://lore.kernel.org/lkml/20181221163316.GA8517@xxxxxxxxx/
> [3]: https://lore.kernel.org/lkml/CAHRSSEx+gDVW4fKKK8oZNAir9G5icJLyodO8hykv3O0O1jt2FQ@xxxxxxxxxxxxxx/
> [4]: https://lore.kernel.org/lkml/20181221192044.5yvfnuri7gdop4rs@xxxxxxxxxx/
>
> Cc: Todd Kjos <tkjos@xxxxxxxxxx>
> Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
> Signed-off-by: Christian Brauner <christian.brauner@xxxxxxxxxx>
> ---
> drivers/android/binderfs.c | 109 +++++++++++++++++++++++++++++++++++--
> 1 file changed, 104 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c
> index 7496b10532aa..93aee00994d4 100644
> --- a/drivers/android/binderfs.c
> +++ b/drivers/android/binderfs.c
> @@ -20,6 +20,7 @@
> #include <linux/parser.h>
> #include <linux/radix-tree.h>
> #include <linux/sched.h>
> +#include <linux/seq_file.h>
> #include <linux/slab.h>
> #include <linux/spinlock_types.h>
> #include <linux/stddef.h>
> @@ -39,6 +40,11 @@
> #define INODE_OFFSET 3
> #define INTSTRLEN 21
> #define BINDERFS_MAX_MINOR (1U << MINORBITS)
> +/*
> + * Ensure that the initial ipc namespace always has a good chunk of devices
> + * available.
> + */
> +#define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 1024)
>
> static struct vfsmount *binderfs_mnt;
>
> @@ -46,6 +52,24 @@ static dev_t binderfs_dev;
> static DEFINE_MUTEX(binderfs_minors_mutex);
> static DEFINE_IDA(binderfs_minors);
>
> +/**
> + * binderfs_mount_opts - mount options for binderfs
> + * @max: maximum number of allocatable binderfs binder devices
> + */
> +struct binderfs_mount_opts {
> + int max;
> +};
> +
> +enum {
> + Opt_max,
> + Opt_err
> +};
> +
> +static const match_table_t tokens = {
> + { Opt_max, "max=%d" },
> + { Opt_err, NULL }
> +};
> +
> /**
> * binderfs_info - information about a binderfs mount
> * @ipc_ns: The ipc namespace the binderfs mount belongs to.
> @@ -55,13 +79,16 @@ static DEFINE_IDA(binderfs_minors);
> * created.
> * @root_gid: gid that needs to be used when a new binder device is
> * created.
> + * @mount_opts: The mount options in use.
> + * @device_count: The current number of allocated binder devices.
> */
> struct binderfs_info {
> struct ipc_namespace *ipc_ns;
> struct dentry *control_dentry;
> kuid_t root_uid;
> kgid_t root_gid;
> -
> + struct binderfs_mount_opts mount_opts;
> + atomic_t device_count;
> };
>
> static inline struct binderfs_info *BINDERFS_I(const struct inode *inode)
> @@ -107,13 +134,22 @@ static int binderfs_binder_device_create(struct inode *ref_inode,
> struct inode *inode = NULL;
> struct super_block *sb = ref_inode->i_sb;
> struct binderfs_info *info = sb->s_fs_info;
> + bool use_reserve = (info->ipc_ns == &init_ipc_ns);
>
> /* Reserve new minor number for the new device. */
> mutex_lock(&binderfs_minors_mutex);
> - minor = ida_alloc_max(&binderfs_minors, BINDERFS_MAX_MINOR, GFP_KERNEL);
> + if (atomic_inc_return(&info->device_count) < info->mount_opts.max)
> + minor = ida_alloc_max(&binderfs_minors,
> + use_reserve ? BINDERFS_MAX_MINOR :
> + BINDERFS_MAX_MINOR_CAPPED,
> + GFP_KERNEL);
> + else
> + minor = -ENOSPC;
> mutex_unlock(&binderfs_minors_mutex);
> - if (minor < 0)
> + if (minor < 0) {
> + atomic_dec(&info->device_count);
> return minor;
> + }
>
> ret = -ENOMEM;
> device = kzalloc(sizeof(*device), GFP_KERNEL);
> @@ -187,6 +223,7 @@ static int binderfs_binder_device_create(struct inode *ref_inode,
> kfree(name);
> kfree(device);
> mutex_lock(&binderfs_minors_mutex);
> + atomic_dec(&info->device_count);
> ida_free(&binderfs_minors, minor);
> mutex_unlock(&binderfs_minors_mutex);
> iput(inode);
> @@ -232,6 +269,7 @@ static long binder_ctl_ioctl(struct file *file, unsigned int cmd,
> static void binderfs_evict_inode(struct inode *inode)
> {
> struct binder_device *device = inode->i_private;
> + struct binderfs_info *info = BINDERFS_I(inode);
>
> clear_inode(inode);
>
> @@ -239,6 +277,7 @@ static void binderfs_evict_inode(struct inode *inode)
> return;
>
> mutex_lock(&binderfs_minors_mutex);
> + atomic_dec(&info->device_count);
> ida_free(&binderfs_minors, device->miscdev.minor);
> mutex_unlock(&binderfs_minors_mutex);
>
> @@ -246,9 +285,64 @@ static void binderfs_evict_inode(struct inode *inode)
> kfree(device);
> }
>
> +/**
> + * binderfs_parse_mount_opts - parse binderfs mount options
> + * @data: options to set (can be NULL in which case defaults are used)
> + */
> +static int binderfs_parse_mount_opts(char *data,
> + struct binderfs_mount_opts *opts)
> +{
> + char *p;
> + opts->max = BINDERFS_MAX_MINOR;
> +
> + while ((p = strsep(&data, ",")) != NULL) {
> + substring_t args[MAX_OPT_ARGS];
> + int token;
> + int max_devices;
> +
> + if (!*p)
> + continue;
> +
> + token = match_token(p, tokens, args);
> + switch (token) {
> + case Opt_max:
> + max_devices = match_int(&args[0], &max_devices);
> + if (max_devices < 0 || (max_devices > BINDERFS_MAX_MINOR))
> + return -EINVAL;

One should always account for their own stupid mistakes and here's one
I made: the ways match_int() is used here is nonsense since it
overwrites the parsed max_devices value. This should be:

if (match_int(&args[0], &max_devices) ||
(max_devices < 0 ||
(max_devices > BINDERFS_MAX_MINOR)))
return -EINVAL;

Good that we have to have another version anyway. Sorry about that.

Christian

> +
> + opts->max = max_devices;
> + break;
> + default:
> + pr_err("Invalid mount options\n");
> + return -EINVAL;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int binderfs_remount(struct super_block *sb, int *flags, char *data)
> +{
> + struct binderfs_info *info = sb->s_fs_info;
> + return binderfs_parse_mount_opts(data, &info->mount_opts);
> +}
> +
> +static int binderfs_show_mount_opts(struct seq_file *seq, struct dentry *root)
> +{
> + struct binderfs_info *info;
> +
> + info = root->d_sb->s_fs_info;
> + if (info->mount_opts.max < BINDERFS_MAX_MINOR)
> + seq_printf(seq, ",max=%d", info->mount_opts.max);
> +
> + return 0;
> +}
> +
> static const struct super_operations binderfs_super_ops = {
> - .statfs = simple_statfs,
> - .evict_inode = binderfs_evict_inode,
> + .evict_inode = binderfs_evict_inode,
> + .remount_fs = binderfs_remount,
> + .show_options = binderfs_show_mount_opts,
> + .statfs = simple_statfs,
> };
>
> static int binderfs_rename(struct inode *old_dir, struct dentry *old_dentry,
> @@ -409,6 +503,10 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent)
> if (!info)
> goto err_without_dentry;
>
> + ret = binderfs_parse_mount_opts(data, &info->mount_opts);
> + if (ret)
> + goto err_without_dentry;
> +
> info->ipc_ns = ipc_ns;
> info->root_gid = make_kgid(sb->s_user_ns, 0);
> if (!gid_valid(info->root_gid))
> @@ -416,6 +514,7 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent)
> info->root_uid = make_kuid(sb->s_user_ns, 0);
> if (!uid_valid(info->root_uid))
> info->root_uid = GLOBAL_ROOT_UID;
> + atomic_set(&info->device_count, 0);
>
> sb->s_fs_info = info;
>
> --
> 2.19.1
>