[PATCH] net: sfc: Fix kernel panic introduced by commit 044588b96372 ("sfc: define inner/outer csum offload TXQ types")

From: Yury Vostrikov
Date: Tue Apr 20 2021 - 16:12:10 EST


NIC has EFX_MAX_CHANNELS channels:
struct efx_nic {
[...]
struct efx_channel *channel[EFX_MAX_CHANNELS];
[...]
}

Each channel has EFX_MAX_TXQ_PER_CHANNEL TX queues; There a reverse
mapping from type to TX queue, holding at most EFX_TXQ_TYPES
entries. This mapping is a bitset mapping because EFX_TXQ_TYPE_* is
defined as non-overlapping bit enum:

struct efx_channel {
[...]
struct efx_tx_queue tx_queue[EFX_MAX_TXQ_PER_CHANNEL];
struct efx_tx_queue *tx_queue_by_type[EFX_TXQ_TYPES];
[...]
}

Because channels and queues are enumerated in-order in
efx_set_channels(), it is possible to get tx_queue be calling

efx_get_tx_queue(efx, qid / EFX_MAX_TXQ_PER_CHANNEL,
qid % EFX_MAX_TXQ_PER_CHANNEL);

This uses qid / EFX_MAX_TXQ_PER_CHANNEL as index inside
efx_nic->channels[] and qid % EFX_MAX_TXQ_PER_CHANNEL as index inside
channel->tx_queue_be_type[].

Indexing into bitset mapping with modulo operation seems to oversight
from the previous refactoring. Comments of other call sites also
indicate that the second argument is indeed queue->label (which is an
index into channel->tx_queue[]), not queue->type. It also looks like
that some callers do need indexing by type, though.

However, because the sizes of tx_queue[] and tx_queue_by_type[] are
equal, and every single slot in both arrays is not equal to NULL, no
crash occurs.

commit 044588b96372 ("sfc: define inner/outer csum offload TXQ types")
add additional TXQ_TYPE and bumps size of tx_queue_by_type to 8
elements. Some of its members are NULL now. During interface shutdown,
tx_queues are flushed; this, in turn, results in a callback to
efx_farch_handle_tx_flush_done, which then tries to use
qid % EFX_MAX_TXQ_PER_CHANNEL as queue->type, gets NULL back, and
crashes.

Address this by adding efx_get_tx_queue_by_type() and updating
relevant callers.

Signed-off-by: Yury Vostrikov <mon@xxxxxxxxxxx>
---
drivers/net/ethernet/sfc/net_driver.h | 21 ++++++++++++++++++---
drivers/net/ethernet/sfc/ptp.c | 2 +-
drivers/net/ethernet/sfc/tx.c | 2 +-
3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 9f7dfdf708cf..4ab0fe21a3a6 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -1533,18 +1533,33 @@ static inline unsigned int efx_channel_num_tx_queues(struct efx_channel *channel
}

static inline struct efx_tx_queue *
-efx_channel_get_tx_queue(struct efx_channel *channel, unsigned int type)
+efx_channel_get_tx_queue_by_type(struct efx_channel *channel, unsigned int type)
{
EFX_WARN_ON_ONCE_PARANOID(type >= EFX_TXQ_TYPES);
return channel->tx_queue_by_type[type];
}

static inline struct efx_tx_queue *
-efx_get_tx_queue(struct efx_nic *efx, unsigned int index, unsigned int type)
+efx_get_tx_queue_by_type(struct efx_nic *efx, unsigned int index, unsigned int type)
{
struct efx_channel *channel = efx_get_tx_channel(efx, index);

- return efx_channel_get_tx_queue(channel, type);
+ return efx_channel_get_tx_queue_by_type(channel, type);
+}
+
+static inline struct efx_tx_queue *
+efx_channel_get_tx_queue(struct efx_channel *channel, unsigned int label)
+{
+ EFX_WARN_ON_ONCE_PARANOID(label >= EFX_MAX_TXQ_PER_CHANNEL);
+ return &channel->tx_queue[label];
+}
+
+static inline struct efx_tx_queue *
+efx_get_tx_queue(struct efx_nic *efx, unsigned int index, unsigned int label)
+{
+ struct efx_channel *channel = efx_get_tx_channel(efx, index);
+
+ return efx_channel_get_tx_queue(channel, label);
}

/* Iterate over all TX queues belonging to a channel */
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index a39c5143b386..7de19d22dadc 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -1091,7 +1091,7 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb)
u8 type = efx_tx_csum_type_skb(skb);
struct efx_tx_queue *tx_queue;

- tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type);
+ tx_queue = efx_channel_get_tx_queue_by_type(ptp_data->channel, type);
if (tx_queue && tx_queue->timestamping) {
efx_enqueue_skb(tx_queue, skb);
} else {
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 1665529a7271..18742db2990d 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -533,7 +533,7 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
return efx_ptp_tx(efx, skb);
}

- tx_queue = efx_get_tx_queue(efx, index, type);
+ tx_queue = efx_get_tx_queue_by_type(efx, index, type);
if (WARN_ON_ONCE(!tx_queue)) {
/* We don't have a TXQ of the right type.
* This should never happen, as we don't advertise offload
--
2.20.1