[PATCH 2/5] scsi: fnic: Fix crash in fnic_wq_cmpl_handler when FDMI times out

From: Karan Tilak Kumar
Date: Wed Jun 11 2025 - 14:31:21 EST


When both the RHBA and RPA FDMI requests time out, fnic reuses a frame
to send ABTS for each of them. On send completion, this causes an
attempt to free the same frame twice that leads to a crash.

Fix crash by allocating separate frames for RHBA and RPA,
and modify ABTS logic accordingly.

Tested by checking MDS for FDMI information.
Tested by using instrumented driver to:
Drop PLOGI response
Drop RHBA response
Drop RPA response
Drop RHBA and RPA response
Drop PLOGI response + ABTS response
Drop RHBA response + ABTS response
Drop RPA response + ABTS response
Drop RHBA and RPA response + ABTS response for both of them

Reviewed-by: Sesidhar Baddela <sebaddel@xxxxxxxxx>
Reviewed-by: Arulprabhu Ponnusamy <arulponn@xxxxxxxxx>
Reviewed-by: Gian Carlo Boffa <gcboffa@xxxxxxxxx>
Tested-by: Arun Easi <aeasi@xxxxxxxxx>
Co-developed-by: Arun Easi <aeasi@xxxxxxxxx>
Signed-off-by: Arun Easi <aeasi@xxxxxxxxx>
Tested-by: Karan Tilak Kumar <kartilak@xxxxxxxxx>
Signed-off-by: Karan Tilak Kumar <kartilak@xxxxxxxxx>
---
drivers/scsi/fnic/fdls_disc.c | 113 +++++++++++++++++++++++++---------
drivers/scsi/fnic/fnic_fdls.h | 1 +
2 files changed, 86 insertions(+), 28 deletions(-)

diff --git a/drivers/scsi/fnic/fdls_disc.c b/drivers/scsi/fnic/fdls_disc.c
index c2b6f4eb338e..0ee1b74967b9 100644
--- a/drivers/scsi/fnic/fdls_disc.c
+++ b/drivers/scsi/fnic/fdls_disc.c
@@ -763,47 +763,69 @@ static void fdls_send_fabric_abts(struct fnic_iport_s *iport)
iport->fabric.timer_pending = 1;
}

-static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
+static uint8_t *fdls_alloc_init_fdmi_abts_frame(struct fnic_iport_s *iport,
+ uint16_t oxid)
{
- uint8_t *frame;
+ struct fc_frame_header *pfdmi_abts;
uint8_t d_id[3];
+ uint8_t *frame;
struct fnic *fnic = iport->fnic;
- struct fc_frame_header *pfabric_abts;
- unsigned long fdmi_tov;
- uint16_t oxid;
- uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
- sizeof(struct fc_frame_header);

frame = fdls_alloc_frame(iport);
if (frame == NULL) {
FNIC_FCS_DBG(KERN_ERR, fnic->host, fnic->fnic_num,
"Failed to allocate frame to send FDMI ABTS");
- return;
+ return NULL;
}

- pfabric_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET);
+ pfdmi_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET);
fdls_init_fabric_abts_frame(frame, iport);

hton24(d_id, FC_FID_MGMT_SERV);
- FNIC_STD_SET_D_ID(*pfabric_abts, d_id);
+ FNIC_STD_SET_D_ID(*pfdmi_abts, d_id);
+ FNIC_STD_SET_OX_ID(*pfdmi_abts, oxid);
+
+ return frame;
+}
+
+static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
+{
+ uint8_t *frame;
+ unsigned long fdmi_tov;
+ uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
+ sizeof(struct fc_frame_header);

if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) {
- oxid = iport->active_oxid_fdmi_plogi;
- FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
+ frame = fdls_alloc_init_fdmi_abts_frame(iport,
+ iport->active_oxid_fdmi_plogi);
+ if (frame == NULL)
+ return;
+
fnic_send_fcoe_frame(iport, frame, frame_size);
} else {
if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) {
- oxid = iport->active_oxid_fdmi_rhba;
- FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
+ frame = fdls_alloc_init_fdmi_abts_frame(iport,
+ iport->active_oxid_fdmi_rhba);
+ if (frame == NULL)
+ return;
+
fnic_send_fcoe_frame(iport, frame, frame_size);
}
if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) {
- oxid = iport->active_oxid_fdmi_rpa;
- FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
+ frame = fdls_alloc_init_fdmi_abts_frame(iport,
+ iport->active_oxid_fdmi_rpa);
+ if (frame == NULL) {
+ if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING)
+ goto arm_timer;
+ else
+ return;
+ }
+
fnic_send_fcoe_frame(iport, frame, frame_size);
}
}

+arm_timer:
fdmi_tov = jiffies + msecs_to_jiffies(2 * iport->e_d_tov);
mod_timer(&iport->fabric.fdmi_timer, round_jiffies(fdmi_tov));
iport->fabric.fdmi_pending |= FDLS_FDMI_ABORT_PENDING;
@@ -2244,6 +2266,21 @@ void fdls_fabric_timer_callback(struct timer_list *t)
spin_unlock_irqrestore(&fnic->fnic_lock, flags);
}

+void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport)
+{
+ struct fnic *fnic = iport->fnic;
+
+ iport->fabric.fdmi_pending = 0;
+ /* If max retries not exhausted, start over from fdmi plogi */
+ if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) {
+ iport->fabric.fdmi_retry++;
+ FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+ "Retry FDMI PLOGI. FDMI retry: %d",
+ iport->fabric.fdmi_retry);
+ fdls_send_fdmi_plogi(iport);
+ }
+}
+
void fdls_fdmi_timer_callback(struct timer_list *t)
{
struct fnic_fdls_fabric_s *fabric = from_timer(fabric, t, fdmi_timer);
@@ -2289,14 +2326,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t)
FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
"fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);

- iport->fabric.fdmi_pending = 0;
- /* If max retries not exhaused, start over from fdmi plogi */
- if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) {
- iport->fabric.fdmi_retry++;
- FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
- "retry fdmi timer %d", iport->fabric.fdmi_retry);
- fdls_send_fdmi_plogi(iport);
- }
+ fdls_fdmi_retry_plogi(iport);
FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
"fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
spin_unlock_irqrestore(&fnic->fnic_lock, flags);
@@ -3714,11 +3744,32 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
switch (FNIC_FRAME_TYPE(oxid)) {
case FNIC_FRAME_TYPE_FDMI_PLOGI:
fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_plogi);
+
+ iport->fabric.fdmi_pending &= ~FDLS_FDMI_PLOGI_PENDING;
+ iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
break;
case FNIC_FRAME_TYPE_FDMI_RHBA:
+ iport->fabric.fdmi_pending &= ~FDLS_FDMI_REG_HBA_PENDING;
+
+ /* If RPA is still pending, don't turn off ABORT PENDING.
+ * We count on the timer to detect the ABTS timeout and take
+ * corrective action.
+ */
+ if (!(iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING))
+ iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
+
fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rhba);
break;
case FNIC_FRAME_TYPE_FDMI_RPA:
+ iport->fabric.fdmi_pending &= ~FDLS_FDMI_RPA_PENDING;
+
+ /* If RHBA is still pending, don't turn off ABORT PENDING.
+ * We count on the timer to detect the ABTS timeout and take
+ * corrective action.
+ */
+ if (!(iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING))
+ iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
+
fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rpa);
break;
default:
@@ -3728,10 +3779,16 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
break;
}

- timer_delete_sync(&iport->fabric.fdmi_timer);
- iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
-
- fdls_send_fdmi_plogi(iport);
+ /*
+ * Only if ABORT PENDING is off, delete the timer, and if no other
+ * operations are pending, retry FDMI.
+ * Otherwise, let the timer pop and take the appropriate action.
+ */
+ if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) {
+ timer_delete_sync(&iport->fabric.fdmi_timer);
+ if (!iport->fabric.fdmi_pending)
+ fdls_fdmi_retry_plogi(iport);
+ }
}

static void
diff --git a/drivers/scsi/fnic/fnic_fdls.h b/drivers/scsi/fnic/fnic_fdls.h
index 8e610b65ad57..531d0b37e450 100644
--- a/drivers/scsi/fnic/fnic_fdls.h
+++ b/drivers/scsi/fnic/fnic_fdls.h
@@ -394,6 +394,7 @@ void fdls_send_tport_abts(struct fnic_iport_s *iport,
bool fdls_delete_tport(struct fnic_iport_s *iport,
struct fnic_tport_s *tport);
void fdls_fdmi_timer_callback(struct timer_list *t);
+void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport);

/* fnic_fcs.c */
void fnic_fdls_init(struct fnic *fnic, int usefip);
--
2.47.1