Re: [PATCH bpf-next 1/9] bpf: extend cgroup bpf core to allow multiple cgroup storage types

From: Song Liu
Date: Tue Sep 25 2018 - 01:56:39 EST


On Fri, Sep 21, 2018 at 10:17 AM Roman Gushchin <guro@xxxxxx> wrote:
>
> In order to introduce per-cpu cgroup storage, let's generalize
> bpf cgroup core to support multiple cgroup storage types.
> Potentially, per-node cgroup storage can be added later.
>
> This commit is mostly a formal change that replaces
> cgroup_storage pointer with a array of cgroup_storage pointers.
> It doesn't actually introduce a new storage type,
> it will be done later.
>
> Each bpf program is now able to have one cgroup storage of each type.
>
> Signed-off-by: Roman Gushchin <guro@xxxxxx>
> Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx>
> Cc: Alexei Starovoitov <ast@xxxxxxxxxx>

Acked-by: Song Liu <songliubraving@xxxxxx>

> ---
> include/linux/bpf-cgroup.h | 38 ++++++++++++++------
> include/linux/bpf.h | 11 ++++--
> kernel/bpf/cgroup.c | 74 ++++++++++++++++++++++++++------------
> kernel/bpf/helpers.c | 15 ++++----
> kernel/bpf/local_storage.c | 18 ++++++----
> kernel/bpf/syscall.c | 9 +++--
> kernel/bpf/verifier.c | 8 +++--
> net/bpf/test_run.c | 20 +++++++----
> 8 files changed, 136 insertions(+), 57 deletions(-)
>
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index f91b0f8ff3a9..e9871b012dac 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -2,6 +2,7 @@
> #ifndef _BPF_CGROUP_H
> #define _BPF_CGROUP_H
>
> +#include <linux/bpf.h>
> #include <linux/errno.h>
> #include <linux/jump_label.h>
> #include <linux/percpu.h>
> @@ -22,7 +23,10 @@ struct bpf_cgroup_storage;
> extern struct static_key_false cgroup_bpf_enabled_key;
> #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
>
> -DECLARE_PER_CPU(void*, bpf_cgroup_storage);
> +DECLARE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
> +
> +#define for_each_cgroup_storage_type(stype) \
> + for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
>
> struct bpf_cgroup_storage_map;
>
> @@ -43,7 +47,7 @@ struct bpf_cgroup_storage {
> struct bpf_prog_list {
> struct list_head node;
> struct bpf_prog *prog;
> - struct bpf_cgroup_storage *storage;
> + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
> };
>
> struct bpf_prog_array;
> @@ -101,18 +105,29 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
> int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
> short access, enum bpf_attach_type type);
>
> -static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage)
> +static inline enum bpf_cgroup_storage_type cgroup_storage_type(
> + struct bpf_map *map)
> {
> + return BPF_CGROUP_STORAGE_SHARED;
> +}
> +
> +static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
> + *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
> +{
> + enum bpf_cgroup_storage_type stype;
> struct bpf_storage_buffer *buf;
>
> - if (!storage)
> - return;
> + for_each_cgroup_storage_type(stype) {
> + if (!storage[stype])
> + continue;
>
> - buf = READ_ONCE(storage->buf);
> - this_cpu_write(bpf_cgroup_storage, &buf->data[0]);
> + buf = READ_ONCE(storage[stype]->buf);
> + this_cpu_write(bpf_cgroup_storage[stype], &buf->data[0]);
> + }
> }
>
> -struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog);
> +struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
> + enum bpf_cgroup_storage_type stype);
> void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
> void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
> struct cgroup *cgroup,
> @@ -265,13 +280,14 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
> return -EINVAL;
> }
>
> -static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {}
> +static inline void bpf_cgroup_storage_set(
> + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
> static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
> struct bpf_map *map) { return 0; }
> static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
> struct bpf_map *map) {}
> static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
> - struct bpf_prog *prog) { return 0; }
> + struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; }
> static inline void bpf_cgroup_storage_free(
> struct bpf_cgroup_storage *storage) {}
>
> @@ -293,6 +309,8 @@ static inline void bpf_cgroup_storage_free(
> #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
> #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
>
> +#define for_each_cgroup_storage_type(stype) for (; false; )
> +
> #endif /* CONFIG_CGROUP_BPF */
>
> #endif /* _BPF_CGROUP_H */
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 988a00797bcd..b457fbe7b70b 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -272,6 +272,13 @@ struct bpf_prog_offload {
> u32 jited_len;
> };
>
> +enum bpf_cgroup_storage_type {
> + BPF_CGROUP_STORAGE_SHARED,
> + __BPF_CGROUP_STORAGE_MAX
> +};
> +
> +#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
> +
> struct bpf_prog_aux {
> atomic_t refcnt;
> u32 used_map_cnt;
> @@ -289,7 +296,7 @@ struct bpf_prog_aux {
> struct bpf_prog *prog;
> struct user_struct *user;
> u64 load_time; /* ns since boottime */
> - struct bpf_map *cgroup_storage;
> + struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
> char name[BPF_OBJ_NAME_LEN];
> #ifdef CONFIG_SECURITY
> void *security;
> @@ -358,7 +365,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
> */
> struct bpf_prog_array_item {
> struct bpf_prog *prog;
> - struct bpf_cgroup_storage *cgroup_storage;
> + struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
> };
>
> struct bpf_prog_array {
> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index 6a7d931bbc55..065c3d9ff8eb 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -25,6 +25,7 @@ EXPORT_SYMBOL(cgroup_bpf_enabled_key);
> */
> void cgroup_bpf_put(struct cgroup *cgrp)
> {
> + enum bpf_cgroup_storage_type stype;
> unsigned int type;
>
> for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
> @@ -34,8 +35,10 @@ void cgroup_bpf_put(struct cgroup *cgrp)
> list_for_each_entry_safe(pl, tmp, progs, node) {
> list_del(&pl->node);
> bpf_prog_put(pl->prog);
> - bpf_cgroup_storage_unlink(pl->storage);
> - bpf_cgroup_storage_free(pl->storage);
> + for_each_cgroup_storage_type(stype) {
> + bpf_cgroup_storage_unlink(pl->storage[stype]);
> + bpf_cgroup_storage_free(pl->storage[stype]);
> + }
> kfree(pl);
> static_branch_dec(&cgroup_bpf_enabled_key);
> }
> @@ -97,6 +100,7 @@ static int compute_effective_progs(struct cgroup *cgrp,
> enum bpf_attach_type type,
> struct bpf_prog_array __rcu **array)
> {
> + enum bpf_cgroup_storage_type stype;
> struct bpf_prog_array *progs;
> struct bpf_prog_list *pl;
> struct cgroup *p = cgrp;
> @@ -125,7 +129,9 @@ static int compute_effective_progs(struct cgroup *cgrp,
> continue;
>
> progs->items[cnt].prog = pl->prog;
> - progs->items[cnt].cgroup_storage = pl->storage;
> + for_each_cgroup_storage_type(stype)
> + progs->items[cnt].cgroup_storage[stype] =
> + pl->storage[stype];
> cnt++;
> }
> } while ((p = cgroup_parent(p)));
> @@ -232,7 +238,9 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
> {
> struct list_head *progs = &cgrp->bpf.progs[type];
> struct bpf_prog *old_prog = NULL;
> - struct bpf_cgroup_storage *storage, *old_storage = NULL;
> + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
> + *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
> + enum bpf_cgroup_storage_type stype;
> struct bpf_prog_list *pl;
> bool pl_was_allocated;
> int err;
> @@ -254,34 +262,44 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
> if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
> return -E2BIG;
>
> - storage = bpf_cgroup_storage_alloc(prog);
> - if (IS_ERR(storage))
> - return -ENOMEM;
> + for_each_cgroup_storage_type(stype) {
> + storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
> + if (IS_ERR(storage[stype])) {
> + storage[stype] = NULL;
> + for_each_cgroup_storage_type(stype)
> + bpf_cgroup_storage_free(storage[stype]);
> + return -ENOMEM;
> + }
> + }
>
> if (flags & BPF_F_ALLOW_MULTI) {
> list_for_each_entry(pl, progs, node) {
> if (pl->prog == prog) {
> /* disallow attaching the same prog twice */
> - bpf_cgroup_storage_free(storage);
> + for_each_cgroup_storage_type(stype)
> + bpf_cgroup_storage_free(storage[stype]);
> return -EINVAL;
> }
> }
>
> pl = kmalloc(sizeof(*pl), GFP_KERNEL);
> if (!pl) {
> - bpf_cgroup_storage_free(storage);
> + for_each_cgroup_storage_type(stype)
> + bpf_cgroup_storage_free(storage[stype]);
> return -ENOMEM;
> }
>
> pl_was_allocated = true;
> pl->prog = prog;
> - pl->storage = storage;
> + for_each_cgroup_storage_type(stype)
> + pl->storage[stype] = storage[stype];
> list_add_tail(&pl->node, progs);
> } else {
> if (list_empty(progs)) {
> pl = kmalloc(sizeof(*pl), GFP_KERNEL);
> if (!pl) {
> - bpf_cgroup_storage_free(storage);
> + for_each_cgroup_storage_type(stype)
> + bpf_cgroup_storage_free(storage[stype]);
> return -ENOMEM;
> }
> pl_was_allocated = true;
> @@ -289,12 +307,15 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
> } else {
> pl = list_first_entry(progs, typeof(*pl), node);
> old_prog = pl->prog;
> - old_storage = pl->storage;
> - bpf_cgroup_storage_unlink(old_storage);
> + for_each_cgroup_storage_type(stype) {
> + old_storage[stype] = pl->storage[stype];
> + bpf_cgroup_storage_unlink(old_storage[stype]);
> + }
> pl_was_allocated = false;
> }
> pl->prog = prog;
> - pl->storage = storage;
> + for_each_cgroup_storage_type(stype)
> + pl->storage[stype] = storage[stype];
> }
>
> cgrp->bpf.flags[type] = flags;
> @@ -304,21 +325,27 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
> goto cleanup;
>
> static_branch_inc(&cgroup_bpf_enabled_key);
> - if (old_storage)
> - bpf_cgroup_storage_free(old_storage);
> + for_each_cgroup_storage_type(stype) {
> + if (!old_storage[stype])
> + continue;
> + bpf_cgroup_storage_free(old_storage[stype]);
> + }
> if (old_prog) {
> bpf_prog_put(old_prog);
> static_branch_dec(&cgroup_bpf_enabled_key);
> }
> - bpf_cgroup_storage_link(storage, cgrp, type);
> + for_each_cgroup_storage_type(stype)
> + bpf_cgroup_storage_link(storage[stype], cgrp, type);
> return 0;
>
> cleanup:
> /* and cleanup the prog list */
> pl->prog = old_prog;
> - bpf_cgroup_storage_free(pl->storage);
> - pl->storage = old_storage;
> - bpf_cgroup_storage_link(old_storage, cgrp, type);
> + for_each_cgroup_storage_type(stype) {
> + bpf_cgroup_storage_free(pl->storage[stype]);
> + pl->storage[stype] = old_storage[stype];
> + bpf_cgroup_storage_link(old_storage[stype], cgrp, type);
> + }
> if (pl_was_allocated) {
> list_del(&pl->node);
> kfree(pl);
> @@ -339,6 +366,7 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
> enum bpf_attach_type type, u32 unused_flags)
> {
> struct list_head *progs = &cgrp->bpf.progs[type];
> + enum bpf_cgroup_storage_type stype;
> u32 flags = cgrp->bpf.flags[type];
> struct bpf_prog *old_prog = NULL;
> struct bpf_prog_list *pl;
> @@ -385,8 +413,10 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
>
> /* now can actually delete it from this cgroup list */
> list_del(&pl->node);
> - bpf_cgroup_storage_unlink(pl->storage);
> - bpf_cgroup_storage_free(pl->storage);
> + for_each_cgroup_storage_type(stype) {
> + bpf_cgroup_storage_unlink(pl->storage[stype]);
> + bpf_cgroup_storage_free(pl->storage[stype]);
> + }
> kfree(pl);
> if (list_empty(progs))
> /* last program was detached, reset flags to zero */
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 1991466b8327..9070b2ace6aa 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -194,16 +194,18 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
> .ret_type = RET_INTEGER,
> };
>
> -DECLARE_PER_CPU(void*, bpf_cgroup_storage);
> +#ifdef CONFIG_CGROUP_BPF
> +DECLARE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
>
> BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
> {
> - /* map and flags arguments are not used now,
> - * but provide an ability to extend the API
> - * for other types of local storages.
> - * verifier checks that their values are correct.
> + /* flags argument is not used now,
> + * but provides an ability to extend the API.
> + * verifier checks that its value is correct.
> */
> - return (unsigned long) this_cpu_read(bpf_cgroup_storage);
> + enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
> +
> + return (unsigned long) this_cpu_read(bpf_cgroup_storage[stype]);
> }
>
> const struct bpf_func_proto bpf_get_local_storage_proto = {
> @@ -214,3 +216,4 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
> .arg2_type = ARG_ANYTHING,
> };
> #endif
> +#endif
> diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
> index 22ad967d1e5f..0bd9f19fc557 100644
> --- a/kernel/bpf/local_storage.c
> +++ b/kernel/bpf/local_storage.c
> @@ -7,7 +7,7 @@
> #include <linux/rbtree.h>
> #include <linux/slab.h>
>
> -DEFINE_PER_CPU(void*, bpf_cgroup_storage);
> +DEFINE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
>
> #ifdef CONFIG_CGROUP_BPF
>
> @@ -251,6 +251,7 @@ const struct bpf_map_ops cgroup_storage_map_ops = {
>
> int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
> {
> + enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
> struct bpf_cgroup_storage_map *map = map_to_storage(_map);
> int ret = -EBUSY;
>
> @@ -258,11 +259,12 @@ int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
>
> if (map->prog && map->prog != prog)
> goto unlock;
> - if (prog->aux->cgroup_storage && prog->aux->cgroup_storage != _map)
> + if (prog->aux->cgroup_storage[stype] &&
> + prog->aux->cgroup_storage[stype] != _map)
> goto unlock;
>
> map->prog = prog;
> - prog->aux->cgroup_storage = _map;
> + prog->aux->cgroup_storage[stype] = _map;
> ret = 0;
> unlock:
> spin_unlock_bh(&map->lock);
> @@ -272,24 +274,26 @@ int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
>
> void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
> {
> + enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
> struct bpf_cgroup_storage_map *map = map_to_storage(_map);
>
> spin_lock_bh(&map->lock);
> if (map->prog == prog) {
> - WARN_ON(prog->aux->cgroup_storage != _map);
> + WARN_ON(prog->aux->cgroup_storage[stype] != _map);
> map->prog = NULL;
> - prog->aux->cgroup_storage = NULL;
> + prog->aux->cgroup_storage[stype] = NULL;
> }
> spin_unlock_bh(&map->lock);
> }
>
> -struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog)
> +struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
> + enum bpf_cgroup_storage_type stype)
> {
> struct bpf_cgroup_storage *storage;
> struct bpf_map *map;
> u32 pages;
>
> - map = prog->aux->cgroup_storage;
> + map = prog->aux->cgroup_storage[stype];
> if (!map)
> return NULL;
>
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index b3c2d09bcf7a..8c91d2b41b1e 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -988,10 +988,15 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
> /* drop refcnt on maps used by eBPF program and free auxilary data */
> static void free_used_maps(struct bpf_prog_aux *aux)
> {
> + enum bpf_cgroup_storage_type stype;
> int i;
>
> - if (aux->cgroup_storage)
> - bpf_cgroup_storage_release(aux->prog, aux->cgroup_storage);
> + for_each_cgroup_storage_type(stype) {
> + if (!aux->cgroup_storage[stype])
> + continue;
> + bpf_cgroup_storage_release(aux->prog,
> + aux->cgroup_storage[stype]);
> + }
>
> for (i = 0; i < aux->used_map_cnt; i++)
> bpf_map_put(aux->used_maps[i]);
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 8ccbff4fff93..e75f36de91d6 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -5171,11 +5171,15 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
> /* drop refcnt of maps used by the rejected program */
> static void release_maps(struct bpf_verifier_env *env)
> {
> + enum bpf_cgroup_storage_type stype;
> int i;
>
> - if (env->prog->aux->cgroup_storage)
> + for_each_cgroup_storage_type(stype) {
> + if (!env->prog->aux->cgroup_storage[stype])
> + continue;
> bpf_cgroup_storage_release(env->prog,
> - env->prog->aux->cgroup_storage);
> + env->prog->aux->cgroup_storage[stype]);
> + }
>
> for (i = 0; i < env->used_map_cnt; i++)
> bpf_map_put(env->used_maps[i]);
> diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
> index f4078830ea50..0c423b8cd75c 100644
> --- a/net/bpf/test_run.c
> +++ b/net/bpf/test_run.c
> @@ -12,7 +12,7 @@
> #include <linux/sched/signal.h>
>
> static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
> - struct bpf_cgroup_storage *storage)
> + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
> {
> u32 ret;
>
> @@ -28,13 +28,20 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
>
> static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
> {
> - struct bpf_cgroup_storage *storage = NULL;
> + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
> + enum bpf_cgroup_storage_type stype;
> u64 time_start, time_spent = 0;
> u32 ret = 0, i;
>
> - storage = bpf_cgroup_storage_alloc(prog);
> - if (IS_ERR(storage))
> - return PTR_ERR(storage);
> + for_each_cgroup_storage_type(stype) {
> + storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
> + if (IS_ERR(storage[stype])) {
> + storage[stype] = NULL;
> + for_each_cgroup_storage_type(stype)
> + bpf_cgroup_storage_free(storage[stype]);
> + return -ENOMEM;
> + }
> + }
>
> if (!repeat)
> repeat = 1;
> @@ -53,7 +60,8 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
> do_div(time_spent, repeat);
> *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
>
> - bpf_cgroup_storage_free(storage);
> + for_each_cgroup_storage_type(stype)
> + bpf_cgroup_storage_free(storage[stype]);
>
> return ret;
> }
> --
> 2.17.1
>