+static int nvdimm_flush_iova(struct rxe_mr *mr, u64 iova, int length)
+{
+ int err;
+ int bytes;
+ u8 *va;
+ struct rxe_map **map;
+ struct rxe_phys_buf *buf;
+ int m;
+ int i;
+ size_t offset;
+
+ if (length == 0)
+ return 0;
The length is only relevant when the flush type is "Memory Region
Range".
When the flush type is "Memory Region", the entire region must be
flushed successfully before completing the operation.
Yes, currently, the length has been expanded to the MR's length in such case.
Indeed, I didn't confidence about this, the main logic comes from rxe_mr_copy()+
+ if (mr->type == IB_MR_TYPE_DMA) {
+ arch_wb_cache_pmem((void *)iova, length);
+ return 0;
+ }
Are dmamr's supported for remote access? I thought that was
prevented on first principles now. I might suggest not allowing
them to be flushed in any event. There is no length restriction,
and it's a VERY costly operation. At a minimum, protect this
closely.
Thanks for the suggestion.
+
+ WARN_ON_ONCE(!mr->cur_map_set);
The WARN is rather pointless because the code will crash just
seven lines below.
+
+ err = mr_check_range(mr, iova, length);
+ if (err) {
+ err = -EFAULT;
+ goto err1;
+ }
+
+ lookup_iova(mr, iova, &m, &i, &offset);
+
+ map = mr->cur_map_set->map + m;
+ buf = map[0]->buf + i;
+
+ while (length > 0) {
+ va = (u8 *)(uintptr_t)buf->addr + offset;
+ bytes = buf->size - offset;
+
+ if (bytes > length)
+ bytes = length;
+
+ arch_wb_cache_pmem(va, bytes);
+
+ length -= bytes;
+
+ offset = 0;
+ buf++;
+ i++;
+
+ if (i == RXE_BUF_PER_MAP) {
+ i = 0;
+ map++;
+ buf = map[0]->buf;
+ }
+ }
+
+ return 0;
+
+err1:
+ return err;
+}
+
+static enum resp_states process_flush(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ u64 length = 0, start = qp->resp.va;
+ u32 sel = feth_sel(pkt);
+ u32 plt = feth_plt(pkt);
+ struct rxe_mr *mr = qp->resp.mr;
+
+ if (sel == IB_EXT_SEL_MR_RANGE)
+ length = qp->resp.length;
+ else if (sel == IB_EXT_SEL_MR_WHOLE)
+ length = mr->cur_map_set->length;
I'm going to have to think about these
Yes, you inspire me that we should consider to adjust the start of iova to the MR's start as well.
My bad, it ever appeared in my mind. o(╯□╰)o+
+ if (plt == IB_EXT_PLT_PERSIST) {
+ nvdimm_flush_iova(mr, start, length);
+ wmb(); // clwb follows by a sfence
+ } else if (plt == IB_EXT_PLT_GLB_VIS)
+ wmb(); // sfence is enough
The persistence and global visibility bits are not mutually
exclusive,
and in fact persistence does not imply globalIf so, and per the SPEC, why not
visibility in some platforms.
if (plt & IB_EXT_PLT_PERSIST)
do_somethingA();
if (plt & IB_EXT_PLT_GLB_VIS)
do_somethingB();
Second, the "clwb" and "sfence" comments are completelygood catch.
Intel-specific.
What processing will be done on other
processor platforms???
I didn't dig other ARCH yet but INTEL.
In this function, i think i just need to call the higher level wrapper, like wmb() and
arch_wb_cache_pmem are enough, right ?