[PATCH net-next 15/15] net: Move per-CPU flush-lists to bpf_net_context on PREEMPT_RT.

From: Sebastian Andrzej Siewior
Date: Fri May 03 2024 - 14:34:19 EST


The per-CPU flush lists, which are accessed from within the NAPI callback
(xdp_do_flush() for instance), are per-CPU. There are subject to the
same problem as struct bpf_redirect_info.

Add the per-CPU lists cpu_map_flush_list, dev_map_flush_list and
xskmap_map_flush_list to struct bpf_net_context. Add wrappers for the
access.

Cc: "Björn Töpel" <bjorn@xxxxxxxxxx>
Cc: Alexei Starovoitov <ast@xxxxxxxxxx>
Cc: Andrii Nakryiko <andrii@xxxxxxxxxx>
Cc: Eduard Zingerman <eddyz87@xxxxxxxxx>
Cc: Hao Luo <haoluo@xxxxxxxxxx>
Cc: Jesper Dangaard Brouer <hawk@xxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: John Fastabend <john.fastabend@xxxxxxxxx>
Cc: Jonathan Lemon <jonathan.lemon@xxxxxxxxx>
Cc: KP Singh <kpsingh@xxxxxxxxxx>
Cc: Maciej Fijalkowski <maciej.fijalkowski@xxxxxxxxx>
Cc: Magnus Karlsson <magnus.karlsson@xxxxxxxxx>
Cc: Martin KaFai Lau <martin.lau@xxxxxxxxx>
Cc: Song Liu <song@xxxxxxxxxx>
Cc: Stanislav Fomichev <sdf@xxxxxxxxxx>
Cc: Toke Høiland-Jørgensen <toke@xxxxxxxxxx>
Cc: Yonghong Song <yonghong.song@xxxxxxxxx>
Cc: bpf@xxxxxxxxxxxxxxx
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
---
include/linux/filter.h | 38 ++++++++++++++++++++++++++++++++++++++
kernel/bpf/cpumap.c | 24 ++++++++----------------
kernel/bpf/devmap.c | 16 ++++++++--------
net/xdp/xsk.c | 19 +++++++++++--------
4 files changed, 65 insertions(+), 32 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index bdd69bd81df45..68401d84e2050 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -746,6 +746,9 @@ struct bpf_redirect_info {

struct bpf_net_context {
struct bpf_redirect_info ri;
+ struct list_head cpu_map_flush_list;
+ struct list_head dev_map_flush_list;
+ struct list_head xskmap_map_flush_list;
};

#ifndef CONFIG_PREEMPT_RT
@@ -758,6 +761,10 @@ static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bp
ctx = this_cpu_read(bpf_net_context);
if (ctx != NULL)
return NULL;
+ INIT_LIST_HEAD(&bpf_net_ctx->cpu_map_flush_list);
+ INIT_LIST_HEAD(&bpf_net_ctx->dev_map_flush_list);
+ INIT_LIST_HEAD(&bpf_net_ctx->xskmap_map_flush_list);
+
this_cpu_write(bpf_net_context, bpf_net_ctx);
return bpf_net_ctx;
}
@@ -788,6 +795,10 @@ static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bp

if (tsk->bpf_net_context != NULL)
return NULL;
+ INIT_LIST_HEAD(&bpf_net_ctx->cpu_map_flush_list);
+ INIT_LIST_HEAD(&bpf_net_ctx->dev_map_flush_list);
+ INIT_LIST_HEAD(&bpf_net_ctx->xskmap_map_flush_list);
+
tsk->bpf_net_context = bpf_net_ctx;
return bpf_net_ctx;
}
@@ -820,6 +831,33 @@ static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void)
return &bpf_net_ctx->ri;
}

+static inline struct list_head *bpf_net_ctx_get_cpu_map_flush_list(void)
+{
+ struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+
+ if (!bpf_net_ctx)
+ return NULL;
+ return &bpf_net_ctx->cpu_map_flush_list;
+}
+
+static inline struct list_head *bpf_net_ctx_get_dev_flush_list(void)
+{
+ struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+
+ if (!bpf_net_ctx)
+ return NULL;
+ return &bpf_net_ctx->dev_map_flush_list;
+}
+
+static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void)
+{
+ struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+
+ if (!bpf_net_ctx)
+ return NULL;
+ return &bpf_net_ctx->xskmap_map_flush_list;
+}
+
DEFINE_FREE(bpf_net_ctx_clear, struct bpf_net_context *, if (_T) bpf_net_ctx_clear(_T));

/* flags for bpf_redirect_info kern_flags */
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 66974bd027109..0d18ffc93dcab 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -79,8 +79,6 @@ struct bpf_cpu_map {
struct bpf_cpu_map_entry __rcu **cpu_map;
};

-static DEFINE_PER_CPU(struct list_head, cpu_map_flush_list);
-
static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
{
u32 value_size = attr->value_size;
@@ -709,7 +707,7 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
*/
static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
{
- struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
+ struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);

if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
@@ -761,9 +759,12 @@ int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,

void __cpu_map_flush(void)
{
- struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
+ struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
struct xdp_bulk_queue *bq, *tmp;

+ if (!flush_list)
+ return;
+
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
bq_flush_to_queue(bq);

@@ -775,20 +776,11 @@ void __cpu_map_flush(void)
#ifdef CONFIG_DEBUG_NET
bool cpu_map_check_flush(void)
{
- if (list_empty(this_cpu_ptr(&cpu_map_flush_list)))
+ struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
+
+ if (!flush_list || list_empty(bpf_net_ctx_get_cpu_map_flush_list()))
return false;
__cpu_map_flush();
return true;
}
#endif
-
-static int __init cpu_map_init(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu)
- INIT_LIST_HEAD(&per_cpu(cpu_map_flush_list, cpu));
- return 0;
-}
-
-subsys_initcall(cpu_map_init);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 4e2cdbb5629f2..03533e45399a0 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -83,7 +83,6 @@ struct bpf_dtab {
u32 n_buckets;
};

-static DEFINE_PER_CPU(struct list_head, dev_flush_list);
static DEFINE_SPINLOCK(dev_map_lock);
static LIST_HEAD(dev_map_list);

@@ -408,9 +407,12 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
*/
void __dev_flush(void)
{
- struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
+ struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
struct xdp_dev_bulk_queue *bq, *tmp;

+ if (!flush_list)
+ return;
+
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
bq_xmit_all(bq, XDP_XMIT_FLUSH);
bq->dev_rx = NULL;
@@ -422,7 +424,9 @@ void __dev_flush(void)
#ifdef CONFIG_DEBUG_NET
bool dev_check_flush(void)
{
- if (list_empty(this_cpu_ptr(&dev_flush_list)))
+ struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
+
+ if (!flush_list || list_empty(bpf_net_ctx_get_dev_flush_list()))
return false;
__dev_flush();
return true;
@@ -453,7 +457,7 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
struct net_device *dev_rx, struct bpf_prog *xdp_prog)
{
- struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
+ struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);

if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
@@ -1156,15 +1160,11 @@ static struct notifier_block dev_map_notifier = {

static int __init dev_map_init(void)
{
- int cpu;
-
/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
offsetof(struct _bpf_dtab_netdev, dev));
register_netdevice_notifier(&dev_map_notifier);

- for_each_possible_cpu(cpu)
- INIT_LIST_HEAD(&per_cpu(dev_flush_list, cpu));
return 0;
}

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 727aa20be4bde..0ac5c80eef6bf 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -35,8 +35,6 @@
#define TX_BATCH_SIZE 32
#define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE)

-static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
-
void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
{
if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -375,9 +373,12 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)

int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
{
- struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
+ struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
int err;

+ if (!flush_list)
+ return -EINVAL;
+
err = xsk_rcv(xs, xdp);
if (err)
return err;
@@ -390,9 +391,11 @@ int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)

void __xsk_map_flush(void)
{
- struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
+ struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
struct xdp_sock *xs, *tmp;

+ if (!flush_list)
+ return;
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
xsk_flush(xs);
__list_del_clearprev(&xs->flush_node);
@@ -402,7 +405,9 @@ void __xsk_map_flush(void)
#ifdef CONFIG_DEBUG_NET
bool xsk_map_check_flush(void)
{
- if (list_empty(this_cpu_ptr(&xskmap_flush_list)))
+ struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
+
+ if (!flush_list || list_empty(flush_list))
return false;
__xsk_map_flush();
return true;
@@ -1775,7 +1780,7 @@ static struct pernet_operations xsk_net_ops = {

static int __init xsk_init(void)
{
- int err, cpu;
+ int err;

err = proto_register(&xsk_proto, 0 /* no slab */);
if (err)
@@ -1793,8 +1798,6 @@ static int __init xsk_init(void)
if (err)
goto out_pernet;

- for_each_possible_cpu(cpu)
- INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu));
return 0;

out_pernet:
--
2.43.0