Re: [PATCH bpf-next v8 5/7] bpf: Implement bpf_local_storage for inodes

From: Martin KaFai Lau
Date: Mon Aug 17 2020 - 21:28:40 EST


On Mon, Aug 03, 2020 at 06:46:53PM +0200, KP Singh wrote:
> From: KP Singh <kpsingh@xxxxxxxxxx>
>
> Similar to bpf_local_storage for sockets, add local storage for inodes.
> The life-cycle of storage is managed with the life-cycle of the inode.
> i.e. the storage is destroyed along with the owning inode.
>
> The BPF LSM allocates an __rcu pointer to the bpf_local_storage in the
> security blob which are now stackable and can co-exist with other LSMs.
>
> Signed-off-by: KP Singh <kpsingh@xxxxxxxxxx>
> ---
> include/linux/bpf_local_storage.h | 10 +
> include/linux/bpf_lsm.h | 21 ++
> include/linux/bpf_types.h | 3 +
> include/uapi/linux/bpf.h | 38 +++
> kernel/bpf/Makefile | 1 +
> kernel/bpf/bpf_inode_storage.c | 265 ++++++++++++++++++
> kernel/bpf/syscall.c | 3 +-
> kernel/bpf/verifier.c | 10 +
> security/bpf/hooks.c | 7 +
> .../bpf/bpftool/Documentation/bpftool-map.rst | 2 +-
> tools/bpf/bpftool/bash-completion/bpftool | 3 +-
> tools/bpf/bpftool/map.c | 3 +-
> tools/include/uapi/linux/bpf.h | 38 +++
> tools/lib/bpf/libbpf_probes.c | 5 +-
> 14 files changed, 403 insertions(+), 6 deletions(-)
> create mode 100644 kernel/bpf/bpf_inode_storage.c
>
> diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
> index 3685f681a7fc..75c76847fad5 100644
> --- a/include/linux/bpf_local_storage.h
> +++ b/include/linux/bpf_local_storage.h
> @@ -160,4 +160,14 @@ struct bpf_local_storage_data *
> bpf_local_storage_update(void *owner, struct bpf_map *map, void *value,
> u64 map_flags);
>
> +#ifdef CONFIG_BPF_LSM
> +extern const struct bpf_func_proto bpf_inode_storage_get_proto;
> +extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
> +void bpf_inode_storage_free(struct inode *inode);
> +#else
> +static inline void bpf_inode_storage_free(struct inode *inode)
> +{
> +}
> +#endif /* CONFIG_BPF_LSM */
This is LSM specific. Can they be moved to bpf_lsm.h?

> +
> #endif /* _BPF_LOCAL_STORAGE_H */
> diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
> index af74712af585..d0683ada1e49 100644
> --- a/include/linux/bpf_lsm.h
> +++ b/include/linux/bpf_lsm.h
> @@ -17,9 +17,24 @@
> #include <linux/lsm_hook_defs.h>
> #undef LSM_HOOK
>
> +struct bpf_storage_blob {
> + struct bpf_local_storage __rcu *storage;
> +};
> +
> +extern struct lsm_blob_sizes bpf_lsm_blob_sizes;
> +
> int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
> const struct bpf_prog *prog);
>
> +static inline struct bpf_storage_blob *bpf_inode(
> + const struct inode *inode)
> +{
> + if (unlikely(!inode->i_security))
> + return NULL;
> +
> + return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
> +}
> +
> #else /* !CONFIG_BPF_LSM */
>
> static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
> @@ -28,6 +43,12 @@ static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
> return -EOPNOTSUPP;
> }
>
> +static inline struct bpf_storage_blob *bpf_inode(
> + const struct inode *inode)
> +{
> + return NULL;
> +}
> +
> #endif /* CONFIG_BPF_LSM */
>
> #endif /* _LINUX_BPF_LSM_H */

[ ... ]

> diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
> new file mode 100644
> index 000000000000..a51a6328c02d
> --- /dev/null
> +++ b/kernel/bpf/bpf_inode_storage.c
> @@ -0,0 +1,265 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2019 Facebook
> + * Copyright 2020 Google LLC.
> + */
> +
> +#include <linux/rculist.h>
> +#include <linux/list.h>
> +#include <linux/hash.h>
> +#include <linux/types.h>
> +#include <linux/spinlock.h>
> +#include <linux/bpf.h>
> +#include <linux/bpf_local_storage.h>
> +#include <net/sock.h>
> +#include <uapi/linux/sock_diag.h>
> +#include <uapi/linux/btf.h>
> +#include <linux/bpf_lsm.h>
> +#include <linux/btf_ids.h>
> +#include <linux/fdtable.h>
> +
> +DEFINE_BPF_STORAGE_CACHE(inode_cache);
> +
> +static struct bpf_local_storage __rcu **
> +inode_storage_ptr(void *owner)
> +{
> + struct inode *inode = owner;
> + struct bpf_storage_blob *bsb;
> + bsb = bpf_inode(inode);
> + if (!bsb)
> + return NULL;
> + return &bsb->storage;
> +}
> +
> +static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode,
> + struct bpf_map *map,
> + bool cacheit_lockit)
> +{
> + struct bpf_local_storage *inode_storage;
> + struct bpf_local_storage_map *smap;
> + struct bpf_storage_blob *bsb;
> +
> + bsb = bpf_inode(inode);
> + if (!bsb)
> + return ERR_PTR(-ENOENT);
ERR_PTR is returned here...

> +
> + inode_storage = rcu_dereference(bsb->storage);
> + if (!inode_storage)
> + return NULL;
> +
> + smap = (struct bpf_local_storage_map *)map;
> + return bpf_local_storage_lookup(inode_storage, smap, cacheit_lockit);
> +}
> +
> +void bpf_inode_storage_free(struct inode *inode)
> +{
> + struct bpf_local_storage_elem *selem;
> + struct bpf_local_storage *local_storage;
> + bool free_inode_storage = false;
> + struct bpf_storage_blob *bsb;
> + struct hlist_node *n;
> +
> + bsb = bpf_inode(inode);
> + if (!bsb)
> + return;
> +
> + rcu_read_lock();
> +
> + local_storage = rcu_dereference(bsb->storage);
> + if (!local_storage) {
> + rcu_read_unlock();
> + return;
> + }
> +
> + /* Netiher the bpf_prog nor the bpf-map's syscall
> + * could be modifying the local_storage->list now.
> + * Thus, no elem can be added-to or deleted-from the
> + * local_storage->list by the bpf_prog or by the bpf-map's syscall.
> + *
> + * It is racing with bpf_local_storage_map_free() alone
> + * when unlinking elem from the local_storage->list and
> + * the map's bucket->list.
> + */
> + raw_spin_lock_bh(&local_storage->lock);
> + hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
> + /* Always unlink from map before unlinking from
> + * local_storage.
> + */
> + bpf_selem_unlink_map(selem);
> + free_inode_storage =
> + bpf_selem_unlink_storage(local_storage, selem, false);
> + }
> + raw_spin_unlock_bh(&local_storage->lock);
> + rcu_read_unlock();
> +
> + /* free_inoode_storage should always be true as long as
> + * local_storage->list was non-empty.
> + */
> + if (free_inode_storage)
> + kfree_rcu(local_storage, rcu);
> +}
> +
> +
> +static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key)
> +{
> + struct bpf_local_storage_data *sdata;
> + struct file *f;
> + int fd;
> +
> + fd = *(int *)key;
> + f = fcheck(fd);
> + if (!f)
> + return ERR_PTR(-EINVAL);
> +
> + get_file(f);
> + sdata = inode_storage_lookup(f->f_inode, map, true);
> + fput(f);
> + return sdata ? sdata->data : NULL;
sdata can be ERR_PTR here and a few other cases below.

May be inode_storage_lookup() should just return NULL.

> +}
> +
> +static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
> + void *value, u64 map_flags)
> +{
> + struct bpf_local_storage_data *sdata;
> + struct file *f;
> + int fd;
> +
> + fd = *(int *)key;
> + f = fcheck(fd);
> + if (!f)
> + return -EINVAL;
> +
> + get_file(f);
get_file() does atomic_long_inc() instead of atomic_long_inc_not_zero().
I don't see how that helps here. Am I missing something?

> + sdata = bpf_local_storage_update(f->f_inode, map, value, map_flags);
> + fput(f);
> + return PTR_ERR_OR_ZERO(sdata);
> +}
> +