[PATCH AUTOSEL 4.18 021/131] scsi: lpfc: Fix driver crash when re-registering NVME rports.

From: Sasha Levin
Date: Sun Sep 02 2018 - 09:03:49 EST


From: James Smart <jsmart2021@xxxxxxxxx>

[ Upstream commit 93a3922da428ec0752e8b2ab00c42dadbbf805a9 ]

During remote port loss fault testing, the driver crashed with the
following trace:

general protection fault: 0000 [#1] SMP
RIP: ... lpfc_nvme_register_port+0x250/0x480 [lpfc]
Call Trace:
lpfc_nlp_state_cleanup+0x1b3/0x7a0 [lpfc]
lpfc_nlp_set_state+0xa6/0x1d0 [lpfc]
lpfc_cmpl_prli_prli_issue+0x213/0x440
lpfc_disc_state_machine+0x7e/0x1e0 [lpfc]
lpfc_cmpl_els_prli+0x18a/0x200 [lpfc]
lpfc_sli_sp_handle_rspiocb+0x3b5/0x6f0 [lpfc]
lpfc_sli_handle_slow_ring_event_s4+0x161/0x240 [lpfc]
lpfc_work_done+0x948/0x14c0 [lpfc]
lpfc_do_work+0x16f/0x180 [lpfc]
kthread+0xc9/0xe0
ret_from_fork+0x55/0x80

After registering a new remoteport, the driver is pulling an ndlp pointer
from the lpfc rport associated with the private area of a newly registered
remoteport. The private area is uninitialized, so it's garbage.

Correct by pulling the the lpfc rport pointer from the entering ndlp point,
then ndlp value from at rport. Note the entering ndlp may be replacing by
the rport->ndlp due to an address change swap.

Signed-off-by: Dick Kennedy <dick.kennedy@xxxxxxxxxxxx>
Signed-off-by: James Smart <james.smart@xxxxxxxxxxxx>
Signed-off-by: Martin K. Petersen <martin.petersen@xxxxxxxxxx>
Signed-off-by: Sasha Levin <alexander.levin@xxxxxxxxxxxxx>
---
drivers/scsi/lpfc/lpfc_nvme.c | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 76a5a99605aa..d723fd1d7b26 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2687,7 +2687,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
struct lpfc_nvme_rport *oldrport;
struct nvme_fc_remote_port *remote_port;
struct nvme_fc_port_info rpinfo;
- struct lpfc_nodelist *prev_ndlp;
+ struct lpfc_nodelist *prev_ndlp = NULL;

lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NVME_DISC,
"6006 Register NVME PORT. DID x%06x nlptype x%x\n",
@@ -2736,23 +2736,29 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
spin_unlock_irq(&vport->phba->hbalock);
rport = remote_port->private;
if (oldrport) {
+ /* New remoteport record does not guarantee valid
+ * host private memory area.
+ */
+ prev_ndlp = oldrport->ndlp;
if (oldrport == remote_port->private) {
- /* Same remoteport. Just reuse. */
+ /* Same remoteport - ndlp should match.
+ * Just reuse.
+ */
lpfc_printf_vlog(ndlp->vport, KERN_INFO,
LOG_NVME_DISC,
"6014 Rebinding lport to "
"remoteport %p wwpn 0x%llx, "
- "Data: x%x x%x %p x%x x%06x\n",
+ "Data: x%x x%x %p %p x%x x%06x\n",
remote_port,
remote_port->port_name,
remote_port->port_id,
remote_port->port_role,
+ prev_ndlp,
ndlp,
ndlp->nlp_type,
ndlp->nlp_DID);
return 0;
}
- prev_ndlp = rport->ndlp;

/* Sever the ndlp<->rport association
* before dropping the ndlp ref from
@@ -2786,13 +2792,13 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
lpfc_printf_vlog(vport, KERN_INFO,
LOG_NVME_DISC | LOG_NODE,
"6022 Binding new rport to "
- "lport %p Remoteport %p WWNN 0x%llx, "
+ "lport %p Remoteport %p rport %p WWNN 0x%llx, "
"Rport WWPN 0x%llx DID "
- "x%06x Role x%x, ndlp %p\n",
- lport, remote_port,
+ "x%06x Role x%x, ndlp %p prev_ndlp %p\n",
+ lport, remote_port, rport,
rpinfo.node_name, rpinfo.port_name,
rpinfo.port_id, rpinfo.port_role,
- ndlp);
+ ndlp, prev_ndlp);
} else {
lpfc_printf_vlog(vport, KERN_ERR,
LOG_NVME_DISC | LOG_NODE,
--
2.17.1