[PATCH net-next] net: sched: remove unnecessay lock protection for skb_bad_txq/gso_skb

From: Yunsheng Lin
Date: Mon Mar 15 2021 - 05:30:12 EST


Currently qdisc_lock(q) is taken before enqueuing and dequeuing
for lockless qdisc's skb_bad_txq/gso_skb queue, qdisc->seqlock is
also taken, which can provide the same protection as qdisc_lock(q).

This patch removes the unnecessay qdisc_lock(q) protection for
lockless qdisc' skb_bad_txq/gso_skb queue.

And dev_reset_queue() takes the qdisc->seqlock for lockless qdisc
besides taking the qdisc_lock(q) when doing the qdisc reset,
some_qdisc_is_busy() takes both qdisc->seqlock and qdisc_lock(q)
when checking qdisc status. It is unnecessary to take both lock
while the fast path only take one lock, so this patch also changes
it to only take qdisc_lock(q) for locked qdisc, and only take
qdisc->seqlock for lockless qdisc.

Since qdisc->seqlock is taken for lockless qdisc when calling
qdisc_is_running() in some_qdisc_is_busy(), use qdisc->running
to decide if the lockless qdisc is running.

Signed-off-by: Yunsheng Lin <linyunsheng@xxxxxxxxxx>
---
include/net/sch_generic.h | 2 --
net/sched/sch_generic.c | 72 +++++++++++++----------------------------------
2 files changed, 19 insertions(+), 55 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 2d6eb60..0e497ed 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -139,8 +139,6 @@ static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc)

static inline bool qdisc_is_running(struct Qdisc *qdisc)
{
- if (qdisc->flags & TCQ_F_NOLOCK)
- return spin_is_locked(&qdisc->seqlock);
return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
}

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 49eae93..a5f1e3c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -38,7 +38,7 @@ EXPORT_SYMBOL(default_qdisc_ops);
/* Main transmission queue. */

/* Modifications to data participating in scheduling must be protected with
- * qdisc_lock(qdisc) spinlock.
+ * qdisc_lock(qdisc) or qdisc->seqlock spinlock.
*
* The idea is the following:
* - enqueue, dequeue are serialized via qdisc root lock
@@ -51,14 +51,8 @@ EXPORT_SYMBOL(default_qdisc_ops);
static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
{
const struct netdev_queue *txq = q->dev_queue;
- spinlock_t *lock = NULL;
struct sk_buff *skb;

- if (q->flags & TCQ_F_NOLOCK) {
- lock = qdisc_lock(q);
- spin_lock(lock);
- }
-
skb = skb_peek(&q->skb_bad_txq);
if (skb) {
/* check the reason of requeuing without tx lock first */
@@ -77,9 +71,6 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
}
}

- if (lock)
- spin_unlock(lock);
-
return skb;
}

@@ -96,13 +87,6 @@ static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q)
static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
struct sk_buff *skb)
{
- spinlock_t *lock = NULL;
-
- if (q->flags & TCQ_F_NOLOCK) {
- lock = qdisc_lock(q);
- spin_lock(lock);
- }
-
__skb_queue_tail(&q->skb_bad_txq, skb);

if (qdisc_is_percpu_stats(q)) {
@@ -112,20 +96,10 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
qdisc_qstats_backlog_inc(q, skb);
q->q.qlen++;
}
-
- if (lock)
- spin_unlock(lock);
}

static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
{
- spinlock_t *lock = NULL;
-
- if (q->flags & TCQ_F_NOLOCK) {
- lock = qdisc_lock(q);
- spin_lock(lock);
- }
-
while (skb) {
struct sk_buff *next = skb->next;

@@ -144,8 +118,7 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)

skb = next;
}
- if (lock)
- spin_unlock(lock);
+
__netif_schedule(q);
}

@@ -207,24 +180,9 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,

*packets = 1;
if (unlikely(!skb_queue_empty(&q->gso_skb))) {
- spinlock_t *lock = NULL;
-
- if (q->flags & TCQ_F_NOLOCK) {
- lock = qdisc_lock(q);
- spin_lock(lock);
- }

skb = skb_peek(&q->gso_skb);

- /* skb may be null if another cpu pulls gso_skb off in between
- * empty check and lock.
- */
- if (!skb) {
- if (lock)
- spin_unlock(lock);
- goto validate;
- }
-
/* skb in gso_skb were already validated */
*validate = false;
if (xfrm_offload(skb))
@@ -243,11 +201,10 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
} else {
skb = NULL;
}
- if (lock)
- spin_unlock(lock);
+
goto trace;
}
-validate:
+
*validate = true;

if ((q->flags & TCQ_F_ONETXQUEUE) &&
@@ -1153,13 +1110,15 @@ static void dev_reset_queue(struct net_device *dev,

if (nolock)
spin_lock_bh(&qdisc->seqlock);
- spin_lock_bh(qdisc_lock(qdisc));
+ else
+ spin_lock_bh(qdisc_lock(qdisc));

qdisc_reset(qdisc);

- spin_unlock_bh(qdisc_lock(qdisc));
if (nolock)
spin_unlock_bh(&qdisc->seqlock);
+ else
+ spin_unlock_bh(qdisc_lock(qdisc));
}

static bool some_qdisc_is_busy(struct net_device *dev)
@@ -1168,20 +1127,27 @@ static bool some_qdisc_is_busy(struct net_device *dev)

for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *dev_queue;
- spinlock_t *root_lock;
struct Qdisc *q;
+ bool nolock;
int val;

dev_queue = netdev_get_tx_queue(dev, i);
q = dev_queue->qdisc_sleeping;

- root_lock = qdisc_lock(q);
- spin_lock_bh(root_lock);
+ nolock = q->flags & TCQ_F_NOLOCK;
+
+ if (nolock)
+ spin_lock_bh(&q->seqlock);
+ else
+ spin_lock_bh(qdisc_lock(q));

val = (qdisc_is_running(q) ||
test_bit(__QDISC_STATE_SCHED, &q->state));

- spin_unlock_bh(root_lock);
+ if (nolock)
+ spin_unlock_bh(&q->seqlock);
+ else
+ spin_unlock_bh(qdisc_lock(q));

if (val)
return true;
--
2.7.4