Re: [RFC v3] net/core: add optional threading for backlog processing

From: Felix Fietkau
Date: Sun Feb 19 2023 - 10:08:34 EST


On 19.02.23 14:10, Felix Fietkau wrote:
When dealing with few flows or an imbalance on CPU utilization, static RPS
CPU assignment can be too inflexible. Add support for enabling threaded NAPI
for backlog processing in order to allow the scheduler to better balance
processing. This helps better spread the load across idle CPUs.

Signed-off-by: Felix Fietkau <nbd@xxxxxxxx>
---
RFC v3:
- make patch more generic, applies to backlog processing in general
- fix process queue access on flush
RFC v2:
- fix rebase error in rps locking

include/linux/netdevice.h | 2 +
net/core/dev.c | 78 +++++++++++++++++++++++++++++++++++---
net/core/sysctl_net_core.c | 27 +++++++++++++
3 files changed, 102 insertions(+), 5 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d9cdbc047b49..b3cef91b1696 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -522,6 +522,7 @@ static inline bool napi_complete(struct napi_struct *n)
}
int dev_set_threaded(struct net_device *dev, bool threaded);
+int backlog_set_threaded(bool threaded);
/**
* napi_disable - prevent NAPI from scheduling
@@ -3192,6 +3193,7 @@ struct softnet_data {
unsigned int cpu;
unsigned int input_queue_tail;
#endif
+ unsigned int process_queue_empty;
unsigned int received_rps;
unsigned int dropped;
struct sk_buff_head input_pkt_queue;
diff --git a/net/core/dev.c b/net/core/dev.c
index 357081b0113c..76874513b7b5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4597,7 +4597,7 @@ static int napi_schedule_rps(struct softnet_data *sd)
struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
#ifdef CONFIG_RPS
- if (sd != mysd) {
+ if (sd != mysd && !test_bit(NAPI_STATE_THREADED, &sd->backlog.state)) {
sd->rps_ipi_next = mysd->rps_ipi_list;
mysd->rps_ipi_list = sd;
@@ -5778,6 +5778,8 @@ static DEFINE_PER_CPU(struct work_struct, flush_works);
/* Network device is going away, flush any packets still pending */
static void flush_backlog(struct work_struct *work)
{
+ unsigned int process_queue_empty;
+ bool threaded, flush_processq;
struct sk_buff *skb, *tmp;
struct softnet_data *sd;
@@ -5792,8 +5794,15 @@ static void flush_backlog(struct work_struct *work)
input_queue_head_incr(sd);
}
}
+
+ threaded = test_bit(NAPI_STATE_THREADED, &sd->backlog.state);
+ flush_processq = threaded &&
+ !skb_queue_empty_lockless(&sd->process_queue);
Sorry, the patch was missing these lines:
if (flush_processq)
process_queue_empty = sd->process_queue_empty;

rps_unlock_irq_enable(sd);
+ if (threaded)
+ goto out;
+
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->process_queue);