[PATCH v5 4/6] nvme-rdma: short-circuit reconnect retries

From: Daniel Wagner
Date: Tue Apr 09 2024 - 05:36:09 EST


From: Hannes Reinecke <hare@xxxxxxx>

Returning an nvme status from nvme_rdma_setup_ctrl() indicates that the
association was established and we have received a status from the
controller; consequently we should honour the DNR bit. If not any future
reconnect attempts will just return the same error, so we can
short-circuit the reconnect attempts and fail the connection directly.

Reviewed-by: Chaitanya Kulkarni <kch@xxxxxxxxxx>
Signed-off-by: Hannes Reinecke <hare@xxxxxxx>
Signed-off-by: Daniel Wagner <dwagner@xxxxxxx>
---
drivers/nvme/host/rdma.c | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 366f0bb4ebfc..53d51df26ae1 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -982,9 +982,11 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
kfree(ctrl);
}

-static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
+static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl,
+ int status)
{
enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
+ bool recon = nvme_ctrl_reconnect(status);

/* If we are resetting/deleting then do nothing */
if (state != NVME_CTRL_CONNECTING) {
@@ -992,12 +994,14 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
return;
}

- if (nvmf_should_reconnect(&ctrl->ctrl)) {
+ if (recon && nvmf_should_reconnect(&ctrl->ctrl)) {
dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
ctrl->ctrl.opts->reconnect_delay);
queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
ctrl->ctrl.opts->reconnect_delay * HZ);
} else {
+ dev_info(ctrl->ctrl.device, "Removing controller (%d)...\n",
+ status);
nvme_delete_ctrl(&ctrl->ctrl);
}
}
@@ -1104,10 +1108,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
struct nvme_rdma_ctrl, reconnect_work);
+ int ret;

++ctrl->ctrl.nr_reconnects;

- if (nvme_rdma_setup_ctrl(ctrl, false))
+ ret = nvme_rdma_setup_ctrl(ctrl, false);
+ if (ret)
goto requeue;

dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
@@ -1120,7 +1126,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
requeue:
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
ctrl->ctrl.nr_reconnects);
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, ret);
}

static void nvme_rdma_error_recovery_work(struct work_struct *work)
@@ -1145,7 +1151,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
return;
}

- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, -ENOTCONN);
}

static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
@@ -2169,6 +2175,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl =
container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
+ int ret;

nvme_stop_ctrl(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl, false);
@@ -2179,14 +2186,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
return;
}

- if (nvme_rdma_setup_ctrl(ctrl, false))
+ ret = nvme_rdma_setup_ctrl(ctrl, false);
+ if (ret)
goto out_fail;

return;

out_fail:
++ctrl->ctrl.nr_reconnects;
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, ret);
}

static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
--
2.44.0