Re: [PATCH 22/32] KVM: s390: pci: provide routines for enabling/disabling IOAT assist

From: Matthew Rosato
Date: Tue Dec 14 2021 - 13:13:25 EST


On 12/14/21 12:46 PM, Pierre Morel wrote:


On 12/7/21 21:57, Matthew Rosato wrote:
These routines will be wired into the vfio_pci_zdev ioctl handlers to
respond to requests to enable / disable a device for PCI I/O Address
Translation assistance.

Signed-off-by: Matthew Rosato <mjrosato@xxxxxxxxxxxxx>
---
  arch/s390/include/asm/kvm_pci.h |  15 ++++
  arch/s390/include/asm/pci_dma.h |   2 +
  arch/s390/kvm/pci.c             | 133 ++++++++++++++++++++++++++++++++
  arch/s390/kvm/pci.h             |   2 +
  4 files changed, 152 insertions(+)

diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h
index 54a0afdbe7d0..254275399f21 100644
--- a/arch/s390/include/asm/kvm_pci.h
+++ b/arch/s390/include/asm/kvm_pci.h
@@ -16,11 +16,21 @@
  #include <linux/kvm_host.h>
  #include <linux/kvm.h>
  #include <linux/pci.h>
+#include <linux/mutex.h>
  #include <asm/pci_insn.h>
+#include <asm/pci_dma.h>
+
+struct kvm_zdev_ioat {
+    unsigned long *head[ZPCI_TABLE_PAGES];
+    unsigned long **seg;
+    unsigned long ***pt;
+    struct mutex lock;
+};
  struct kvm_zdev {
      struct zpci_dev *zdev;
      struct kvm *kvm;
+    struct kvm_zdev_ioat ioat;
      struct zpci_fib fib;
  };
@@ -33,6 +43,11 @@ extern int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
                     bool assist);
  extern int kvm_s390_pci_aif_disable(struct zpci_dev *zdev);
+extern int kvm_s390_pci_ioat_probe(struct zpci_dev *zdev);
+extern int kvm_s390_pci_ioat_enable(struct zpci_dev *zdev, u64 iota);
+extern int kvm_s390_pci_ioat_disable(struct zpci_dev *zdev);
+extern u8 kvm_s390_pci_get_dtsm(struct zpci_dev *zdev);
+
  extern int kvm_s390_pci_interp_probe(struct zpci_dev *zdev);
  extern int kvm_s390_pci_interp_enable(struct zpci_dev *zdev);
  extern int kvm_s390_pci_interp_disable(struct zpci_dev *zdev);
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 3b8e89d4578a..e1d3c1d3fc8a 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -50,6 +50,8 @@ enum zpci_ioat_dtype {
  #define ZPCI_TABLE_ALIGN        ZPCI_TABLE_SIZE
  #define ZPCI_TABLE_ENTRY_SIZE        (sizeof(unsigned long))
  #define ZPCI_TABLE_ENTRIES        (ZPCI_TABLE_SIZE / ZPCI_TABLE_ENTRY_SIZE)
+#define ZPCI_TABLE_PAGES        (ZPCI_TABLE_SIZE >> PAGE_SHIFT)
+#define ZPCI_TABLE_ENTRIES_PAGES    (ZPCI_TABLE_ENTRIES * ZPCI_TABLE_PAGES)
  #define ZPCI_TABLE_BITS            11
  #define ZPCI_PT_BITS            8
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index 3a29398dd53b..a1c0c0881332 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -12,6 +12,7 @@
  #include <asm/kvm_pci.h>
  #include <asm/pci.h>
  #include <asm/pci_insn.h>
+#include <asm/pci_dma.h>
  #include <asm/sclp.h>
  #include "pci.h"
  #include "kvm-s390.h"
@@ -315,6 +316,131 @@ int kvm_s390_pci_aif_disable(struct zpci_dev *zdev)
  }
  EXPORT_SYMBOL_GPL(kvm_s390_pci_aif_disable);
+int kvm_s390_pci_ioat_probe(struct zpci_dev *zdev)
+{
+    return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pci_ioat_probe);
+
+int kvm_s390_pci_ioat_enable(struct zpci_dev *zdev, u64 iota)
+{
+    gpa_t gpa = (gpa_t)(iota & ZPCI_RTE_ADDR_MASK);
+    struct kvm_zdev_ioat *ioat;
+    struct page *page;
+    struct kvm *kvm;
+    unsigned int idx;
+    void *iaddr;
+    int i, rc = 0;
+
+    if (!zdev->kzdev || !zdev->kzdev->kvm || zdev->kzdev->ioat.head[0])
+        return -EINVAL;

The only caller already checked zdev->kzdev.

I tend to get overzealous with these checks..

Could we use a macro to replace zdev->kzdev->ioat.head[0] ?
like
#define shadow_pgtbl_initialized zdev->kzdev->ioat.head[0] >
Would be clearer for me.

Sure


+
+    /* Ensure supported type specified */
+    if ((iota & ZPCI_IOTA_RTTO_FLAG) != ZPCI_IOTA_RTTO_FLAG)
+        return -EINVAL;
+
+    kvm = zdev->kzdev->kvm;
+    ioat = &zdev->kzdev->ioat;
+    mutex_lock(&ioat->lock);
+    idx = srcu_read_lock(&kvm->srcu);
+    for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+        page = gfn_to_page(kvm, gpa_to_gfn(gpa));

In relation to your question below about where things are being pinned...

Here the call to gfn_to_page does the pin (this call eventually drives hva_to_pfn for pinning)

+        if (is_error_page(page)) {
+            srcu_read_unlock(&kvm->srcu, idx);
+            rc = -EIO;
+            goto out;
+        }
+        iaddr = page_to_virt(page) + (gpa & ~PAGE_MASK);
+        ioat->head[i] = (unsigned long *)iaddr;

^^ here we store what was pinned above in ioat->head[] and can use it later for unpinning.

But looking again now I think for the is_error_page() case above here I should also be going to unpin: to cleanup in case we were somewhere in the middle of the loop and so have some pages pinned already.

+        gpa += PAGE_SIZE;
+    }
+    srcu_read_unlock(&kvm->srcu, idx);
+
+    zdev->kzdev->ioat.seg = kcalloc(ZPCI_TABLE_ENTRIES_PAGES,
+                    sizeof(unsigned long *), GFP_KERNEL);
+    if (!zdev->kzdev->ioat.seg)
+        goto unpin;
+    zdev->kzdev->ioat.pt = kcalloc(ZPCI_TABLE_ENTRIES,
+                       sizeof(unsigned long **), GFP_KERNEL);
+    if (!zdev->kzdev->ioat.pt)
+        goto free_seg;
+
+out:
+    mutex_unlock(&ioat->lock);
+    return rc;
+
+free_seg:
+    kfree(zdev->kzdev->ioat.seg);
+unpin:
+    for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+        kvm_release_pfn_dirty((u64)ioat->head[i] >> PAGE_SHIFT);

I did not find when the pages are pinned.

See above.


+        ioat->head[i] = 0;
+    }
+    mutex_unlock(&ioat->lock);
+    return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pci_ioat_enable);
+

...snip...