[PATCH 4.14 066/156] nvme-rdma: dont complete requests before a send work request has completed

From: Greg Kroah-Hartman
Date: Fri Feb 02 2018 - 12:16:21 EST


4.14-stable review patch. If anyone has any objections, please let me know.

------------------

From: Sagi Grimberg <sagi@xxxxxxxxxxx>


[ Upstream commit 4af7f7ff92a42b6c713293c99e7982bcfcf51a70 ]

In order to guarantee that the HCA will never get an access violation
(either from invalidated rkey or from iommu) when retrying a send
operation we must complete a request only when both send completion and
the nvme cqe has arrived. We need to set the send/recv completions flags
atomically because we might have more than a single context accessing the
request concurrently (one is cq irq-poll context and the other is
user-polling used in IOCB_HIPRI).

Only then we are safe to invalidate the rkey (if needed), unmap the host
buffers, and complete the IO.

Signed-off-by: Sagi Grimberg <sagi@xxxxxxxxxxx>
Reviewed-by: Max Gurtovoy <maxg@xxxxxxxxxxxx>
Signed-off-by: Christoph Hellwig <hch@xxxxxx>
Signed-off-by: Sasha Levin <alexander.levin@xxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
drivers/nvme/host/rdma.c | 28 ++++++++++++++++++++++++----
1 file changed, 24 insertions(+), 4 deletions(-)

--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -67,6 +67,9 @@ struct nvme_rdma_request {
struct nvme_request req;
struct ib_mr *mr;
struct nvme_rdma_qe sqe;
+ union nvme_result result;
+ __le16 status;
+ refcount_t ref;
struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
u32 num_sge;
int nents;
@@ -1177,6 +1180,7 @@ static int nvme_rdma_map_data(struct nvm
req->num_sge = 1;
req->inline_data = false;
req->mr->need_inval = false;
+ refcount_set(&req->ref, 2); /* send and recv completions */

c->common.flags |= NVME_CMD_SGL_METABUF;

@@ -1213,8 +1217,19 @@ static int nvme_rdma_map_data(struct nvm

static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
{
- if (unlikely(wc->status != IB_WC_SUCCESS))
+ struct nvme_rdma_qe *qe =
+ container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
+ struct nvme_rdma_request *req =
+ container_of(qe, struct nvme_rdma_request, sqe);
+ struct request *rq = blk_mq_rq_from_pdu(req);
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
nvme_rdma_wr_error(cq, wc, "SEND");
+ return;
+ }
+
+ if (refcount_dec_and_test(&req->ref))
+ nvme_end_request(rq, req->status, req->result);
}

/*
@@ -1359,14 +1374,19 @@ static int nvme_rdma_process_nvme_rsp(st
}
req = blk_mq_rq_to_pdu(rq);

- if (rq->tag == tag)
- ret = 1;
+ req->status = cqe->status;
+ req->result = cqe->result;

if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) &&
wc->ex.invalidate_rkey == req->mr->rkey)
req->mr->need_inval = false;

- nvme_end_request(rq, cqe->status, cqe->result);
+ if (refcount_dec_and_test(&req->ref)) {
+ if (rq->tag == tag)
+ ret = 1;
+ nvme_end_request(rq, req->status, req->result);
+ }
+
return ret;
}