[PATCH v5 12/12] virt: geniezone: Add memory pin/unpin support

From: Yi-De Wu
Date: Thu Jul 27 2023 - 04:04:30 EST


From: "Jerry Wang" <ze-yu.wang@xxxxxxxxxxxx>

Implement memory pin/unpin.
- use rb_tree to store pinned memory page
- pin page when handling page fault
- unpin page when handling relinquish or destroy

Signed-off-by: Jerry Wang <ze-yu.wang@xxxxxxxxxxxx>
Signed-off-by: Liju Chen <liju-clr.chen@xxxxxxxxxxxx>
Signed-off-by: Yi-De Wu <yi-de.wu@xxxxxxxxxxxx>
---
drivers/virt/geniezone/Makefile | 3 +-
drivers/virt/geniezone/gzvm_exception.c | 32 ----
drivers/virt/geniezone/gzvm_hvc.c | 34 ++++
drivers/virt/geniezone/gzvm_mmu.c | 210 ++++++++++++++++++++++++
drivers/virt/geniezone/gzvm_vcpu.c | 4 +-
drivers/virt/geniezone/gzvm_vm.c | 113 +++----------
include/linux/gzvm_drv.h | 19 ++-
include/uapi/linux/gzvm.h | 5 +
8 files changed, 291 insertions(+), 129 deletions(-)
create mode 100644 drivers/virt/geniezone/gzvm_hvc.c
create mode 100644 drivers/virt/geniezone/gzvm_mmu.c

diff --git a/drivers/virt/geniezone/Makefile b/drivers/virt/geniezone/Makefile
index e1299f99df76..ca4b84630573 100644
--- a/drivers/virt/geniezone/Makefile
+++ b/drivers/virt/geniezone/Makefile
@@ -8,4 +8,5 @@ GZVM_DIR ?= ../../../drivers/virt/geniezone

gzvm-y := $(GZVM_DIR)/gzvm_main.o $(GZVM_DIR)/gzvm_vm.o \
$(GZVM_DIR)/gzvm_vcpu.o $(GZVM_DIR)/gzvm_irqfd.o \
- $(GZVM_DIR)/gzvm_ioeventfd.o $(GZVM_DIR)/gzvm_exception.o
+ $(GZVM_DIR)/gzvm_ioeventfd.o $(GZVM_DIR)/gzvm_exception.o \
+ $(GZVM_DIR)/gzvm_hvc.o $(GZVM_DIR)/gzvm_mmu.o
diff --git a/drivers/virt/geniezone/gzvm_exception.c b/drivers/virt/geniezone/gzvm_exception.c
index c2cab1472d2f..8c413815369b 100644
--- a/drivers/virt/geniezone/gzvm_exception.c
+++ b/drivers/virt/geniezone/gzvm_exception.c
@@ -6,38 +6,6 @@
#include <linux/device.h>
#include <linux/gzvm_drv.h>

-/**
- * gzvm_handle_page_fault() - Handle guest page fault, find corresponding page
- * for the faulting gpa
- * @vcpu: Pointer to struct gzvm_vcpu_run of the faulting vcpu
- *
- * Return:
- * * 0 - Success to handle guest page fault
- * * -EFAULT - Failed to map phys addr to guest's GPA
- */
-static int gzvm_handle_page_fault(struct gzvm_vcpu *vcpu)
-{
- struct gzvm *vm = vcpu->gzvm;
- int memslot_id;
- u64 pfn, gfn;
- int ret;
-
- gfn = PHYS_PFN(vcpu->run->exception.fault_gpa);
- memslot_id = gzvm_find_memslot(vm, gfn);
- if (unlikely(memslot_id < 0))
- return -EFAULT;
-
- ret = gzvm_gfn_to_pfn_memslot(&vm->memslot[memslot_id], gfn, &pfn);
- if (unlikely(ret))
- return -EFAULT;
-
- ret = gzvm_arch_map_guest(vm->vm_id, memslot_id, pfn, gfn, 1);
- if (unlikely(ret))
- return -EFAULT;
-
- return 0;
-}
-
/**
* gzvm_handle_guest_exception() - Handle guest exception
* @vcpu: Pointer to struct gzvm_vcpu_run in userspace
diff --git a/drivers/virt/geniezone/gzvm_hvc.c b/drivers/virt/geniezone/gzvm_hvc.c
new file mode 100644
index 000000000000..d8854eeec04e
--- /dev/null
+++ b/drivers/virt/geniezone/gzvm_hvc.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 MediaTek Inc.
+ */
+
+#include <linux/gzvm_drv.h>
+
+/**
+ * gzvm_handle_guest_hvc() - Handle guest hvc
+ * @vcpu: Pointer to struct gzvm_vcpu_run in userspace
+ * Return:
+ * * true - This hvc has been processed, no need to back to VMM.
+ * * false - This hvc has not been processed, require userspace.
+ */
+bool gzvm_handle_guest_hvc(struct gzvm_vcpu *vcpu)
+{
+ int ret;
+ unsigned long ipa;
+
+ switch (vcpu->run->hypercall.args[0]) {
+ case GZVM_HVC_MEM_RELINQUISH:
+ ipa = vcpu->run->hypercall.args[1];
+ ret = gzvm_handle_relinquish(vcpu, ipa);
+ break;
+ default:
+ ret = false;
+ break;
+ }
+
+ if (!ret)
+ return true;
+ else
+ return false;
+}
diff --git a/drivers/virt/geniezone/gzvm_mmu.c b/drivers/virt/geniezone/gzvm_mmu.c
new file mode 100644
index 000000000000..4c2c7da7f929
--- /dev/null
+++ b/drivers/virt/geniezone/gzvm_mmu.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 MediaTek Inc.
+ */
+
+#include <linux/gzvm_drv.h>
+
+/**
+ * hva_to_pa_fast() - converts hva to pa in generic fast way
+ * @hva: Host virtual address.
+ *
+ * Return: 0 if translation error
+ */
+u64 hva_to_pa_fast(u64 hva)
+{
+ struct page *page[1];
+
+ u64 pfn;
+
+ if (get_user_page_fast_only(hva, 0, page)) {
+ pfn = page_to_phys(page[0]);
+ put_page((struct page *)page);
+ return pfn;
+ } else {
+ return 0;
+ }
+}
+
+/**
+ * hva_to_pa_slow() - note that this function may sleep
+ * @hva: Host virtual address.
+ *
+ * Return: 0 if translation error
+ */
+u64 hva_to_pa_slow(u64 hva)
+{
+ struct page *page = NULL;
+ u64 pfn = 0;
+ int npages;
+
+ npages = get_user_pages_unlocked(hva, 1, &page, 0);
+ if (npages != 1)
+ return 0;
+
+ if (page) {
+ pfn = page_to_phys(page);
+ put_page(page);
+ }
+
+ return pfn;
+}
+
+static u64 __gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn)
+{
+ u64 hva, pa;
+
+ hva = gzvm_gfn_to_hva_memslot(memslot, gfn);
+
+ pa = gzvm_hva_to_pa_arch(hva);
+ if (pa != 0)
+ return PHYS_PFN(pa);
+
+ pa = hva_to_pa_fast(hva);
+ if (pa)
+ return PHYS_PFN(pa);
+
+ pa = hva_to_pa_slow(hva);
+ if (pa)
+ return PHYS_PFN(pa);
+
+ return 0;
+}
+
+/**
+ * gzvm_gfn_to_pfn_memslot() - Translate gfn (guest ipa) to pfn (host pa),
+ * result is in @pfn
+ * @memslot: Pointer to struct gzvm_memslot.
+ * @gfn: Guest frame number.
+ * @pfn: Host page frame number.
+ *
+ * Return:
+ * * 0 - Succeed
+ * * -EFAULT - Failed to convert
+ */
+int gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn, u64 *pfn)
+{
+ u64 __pfn;
+
+ if (!memslot)
+ return -EFAULT;
+
+ __pfn = __gzvm_gfn_to_pfn_memslot(memslot, gfn);
+ if (__pfn == 0) {
+ *pfn = 0;
+ return -EFAULT;
+ }
+
+ *pfn = __pfn;
+
+ return 0;
+}
+
+static int cmp_ppages(struct rb_node *node, const struct rb_node *parent)
+{
+ struct gzvm_pinned_page *a = container_of(node, struct gzvm_pinned_page, node);
+ struct gzvm_pinned_page *b = container_of(parent, struct gzvm_pinned_page, node);
+
+ if (a->ipa < b->ipa)
+ return -1;
+ if (a->ipa > b->ipa)
+ return 1;
+ return 0;
+}
+
+static int rb_ppage_cmp(const void *key, const struct rb_node *node)
+{
+ struct gzvm_pinned_page *p = container_of(node, struct gzvm_pinned_page, node);
+ phys_addr_t ipa = (phys_addr_t)key;
+
+ return (ipa < p->ipa) ? -1 : (ipa > p->ipa);
+}
+
+static int gzvm_insert_ppage(struct gzvm *vm, struct gzvm_pinned_page *ppage)
+{
+ if (rb_find_add(&ppage->node, &vm->pinned_pages, cmp_ppages))
+ return -EEXIST;
+ return 0;
+}
+
+static int pin_one_page(unsigned long hva, struct page **page)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE;
+
+ mmap_read_lock(mm);
+ pin_user_pages(hva, 1, flags, page);
+ mmap_read_unlock(mm);
+
+ return 0;
+}
+
+/**
+ * gzvm_handle_relinquish() - Handle memory relinquish request from hypervisor
+ *
+ * @vcpu: Pointer to struct gzvm_vcpu_run in userspace
+ * @ipa: Start address(gpa) of a reclaimed page
+ */
+int gzvm_handle_relinquish(struct gzvm_vcpu *vcpu, phys_addr_t ipa)
+{
+ struct gzvm_pinned_page *ppage;
+ struct rb_node *node;
+ struct gzvm *vm = vcpu->gzvm;
+
+ node = rb_find((void *)ipa, &vm->pinned_pages, rb_ppage_cmp);
+
+ if (node)
+ rb_erase(node, &vm->pinned_pages);
+ else
+ return 0;
+
+ ppage = container_of(node, struct gzvm_pinned_page, node);
+ unpin_user_pages_dirty_lock(&ppage->page, 1, true);
+ kfree(ppage);
+ return 0;
+}
+
+/**
+ * gzvm_handle_page_fault() - Handle guest page fault, find corresponding page
+ * for the faulting gpa
+ * @vcpu: Pointer to struct gzvm_vcpu_run in userspace
+ */
+int gzvm_handle_page_fault(struct gzvm_vcpu *vcpu)
+{
+ struct gzvm *vm = vcpu->gzvm;
+ int memslot_id;
+ u64 pfn, gfn;
+ unsigned long hva;
+ struct gzvm_pinned_page *ppage = NULL;
+ struct page *page = NULL;
+ int ret;
+
+ gfn = PHYS_PFN(vcpu->run->exception.fault_gpa);
+ memslot_id = gzvm_find_memslot(vm, gfn);
+ if (unlikely(memslot_id < 0))
+ return -EFAULT;
+
+ ret = gzvm_gfn_to_pfn_memslot(&vm->memslot[memslot_id], gfn, &pfn);
+ if (unlikely(ret))
+ return -EFAULT;
+
+ ret = gzvm_arch_map_guest(vm->vm_id, memslot_id, pfn, gfn, 1);
+ if (unlikely(ret))
+ return -EFAULT;
+
+ hva = gzvm_gfn_to_hva_memslot(&vm->memslot[memslot_id], gfn);
+ pin_one_page(hva, &page);
+
+ if (!page)
+ return -EFAULT;
+
+ ppage = kmalloc(sizeof(*ppage), GFP_KERNEL_ACCOUNT);
+ if (!ppage)
+ return -ENOMEM;
+
+ ppage->page = page;
+ ppage->ipa = vcpu->run->exception.fault_gpa;
+ gzvm_insert_ppage(vm, ppage);
+
+ return 0;
+}
diff --git a/drivers/virt/geniezone/gzvm_vcpu.c b/drivers/virt/geniezone/gzvm_vcpu.c
index 794b24da40e2..964e18ff0d2a 100644
--- a/drivers/virt/geniezone/gzvm_vcpu.c
+++ b/drivers/virt/geniezone/gzvm_vcpu.c
@@ -117,7 +117,9 @@ static long gzvm_vcpu_run(struct gzvm_vcpu *vcpu, void * __user argp)
need_userspace = true;
break;
case GZVM_EXIT_HYPERCALL:
- fallthrough;
+ if (!gzvm_handle_guest_hvc(vcpu))
+ need_userspace = true;
+ break;
case GZVM_EXIT_DEBUG:
fallthrough;
case GZVM_EXIT_FAIL_ENTRY:
diff --git a/drivers/virt/geniezone/gzvm_vm.c b/drivers/virt/geniezone/gzvm_vm.c
index 3da5fdc141b6..6fae6bf976c8 100644
--- a/drivers/virt/geniezone/gzvm_vm.c
+++ b/drivers/virt/geniezone/gzvm_vm.c
@@ -16,106 +16,13 @@
static DEFINE_MUTEX(gzvm_list_lock);
static LIST_HEAD(gzvm_list);

-/**
- * hva_to_pa_fast() - converts hva to pa in generic fast way
- * @hva: Host virtual address.
- *
- * Return: 0 if translation error
- */
-static u64 hva_to_pa_fast(u64 hva)
-{
- struct page *page[1];
-
- u64 pfn;
-
- if (get_user_page_fast_only(hva, 0, page)) {
- pfn = page_to_phys(page[0]);
- put_page((struct page *)page);
- return pfn;
- } else {
- return 0;
- }
-}
-
-/**
- * hva_to_pa_slow() - note that this function may sleep
- * @hva: Host virtual address.
- *
- * Return: 0 if translation error
- */
-static u64 hva_to_pa_slow(u64 hva)
-{
- struct page *page;
- int npages;
- u64 pfn;
-
- npages = get_user_pages_unlocked(hva, 1, &page, 0);
- if (npages != 1)
- return 0;
-
- pfn = page_to_phys(page);
- put_page(page);
-
- return pfn;
-}
-
-static u64 gzvm_gfn_to_hva_memslot(struct gzvm_memslot *memslot, u64 gfn)
+u64 gzvm_gfn_to_hva_memslot(struct gzvm_memslot *memslot, u64 gfn)
{
u64 offset = gfn - memslot->base_gfn;

return memslot->userspace_addr + offset * PAGE_SIZE;
}

-static u64 __gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn)
-{
- u64 hva, pa;
-
- hva = gzvm_gfn_to_hva_memslot(memslot, gfn);
-
- pa = gzvm_hva_to_pa_arch(hva);
- if (pa != 0)
- return PHYS_PFN(pa);
-
- pa = hva_to_pa_fast(hva);
- if (pa)
- return PHYS_PFN(pa);
-
- pa = hva_to_pa_slow(hva);
- if (pa)
- return PHYS_PFN(pa);
-
- return 0;
-}
-
-/**
- * gzvm_gfn_to_pfn_memslot() - Translate gfn (guest ipa) to pfn (host pa),
- * result is in @pfn
- * @memslot: Pointer to struct gzvm_memslot.
- * @gfn: Guest frame number.
- * @pfn: Host page frame number.
- *
- * Return:
- * * 0 - Succeed
- * * -EFAULT - Failed to convert
- */
-int gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn, u64 *pfn)
-{
- u64 __pfn;
-
- if (!memslot)
- return -EFAULT;
-
- __pfn = __gzvm_gfn_to_pfn_memslot(memslot, gfn);
- if (__pfn == 0) {
- *pfn = 0;
- return -EFAULT;
- }
-
- *pfn = __pfn;
-
- return 0;
-}
-
/**
* gzvm_find_memslot() - Find memslot containing this @gpa
* @vm: Pointer to struct gzvm
@@ -454,6 +361,21 @@ static long gzvm_vm_ioctl(struct file *filp, unsigned int ioctl,
return ret;
}

+static void gzvm_destroy_ppage(struct gzvm *gzvm)
+{
+ struct gzvm_pinned_page *ppage;
+ struct rb_node *node;
+
+ node = rb_first(&gzvm->pinned_pages);
+ while (node) {
+ ppage = rb_entry(node, struct gzvm_pinned_page, node);
+ unpin_user_pages_dirty_lock(&ppage->page, 1, true);
+ node = rb_next(node);
+ rb_erase(&ppage->node, &gzvm->pinned_pages);
+ kfree(ppage);
+ }
+}
+
static void gzvm_destroy_vm(struct gzvm *gzvm)
{
pr_debug("VM-%u is going to be destroyed\n", gzvm->vm_id);
@@ -470,6 +392,8 @@ static void gzvm_destroy_vm(struct gzvm *gzvm)

mutex_unlock(&gzvm->lock);

+ gzvm_destroy_ppage(gzvm);
+
kfree(gzvm);
}

@@ -505,6 +429,7 @@ static struct gzvm *gzvm_create_vm(unsigned long vm_type)
gzvm->vm_id = ret;
gzvm->mm = current->mm;
mutex_init(&gzvm->lock);
+ gzvm->pinned_pages = RB_ROOT;

ret = gzvm_vm_irqfd_init(gzvm);
if (ret) {
diff --git a/include/linux/gzvm_drv.h b/include/linux/gzvm_drv.h
index d7838679c700..a0bc3b4244cc 100644
--- a/include/linux/gzvm_drv.h
+++ b/include/linux/gzvm_drv.h
@@ -12,6 +12,8 @@
#include <linux/mutex.h>
#include <linux/gzvm.h>
#include <linux/srcu.h>
+#include <linux/rbtree.h>
+#include <linux/mm.h>

#define GZVM_VCPU_MMAP_SIZE PAGE_SIZE
#define INVALID_VM_ID 0xffff
@@ -76,6 +78,12 @@ struct gzvm_vcpu {
struct hrtimer gzvm_mtimer;
};

+struct gzvm_pinned_page {
+ struct rb_node node;
+ struct page *page;
+ u64 ipa;
+};
+
struct gzvm {
struct gzvm_vcpu *vcpus[GZVM_MAX_VCPUS];
/* userspace tied to this vm */
@@ -102,6 +110,9 @@ struct gzvm {
struct srcu_struct irq_srcu;
/* lock for irq injection */
struct mutex irq_lock;
+
+ /* Use rb-tree to record pin/unpin page */
+ struct rb_root pinned_pages;
};

long gzvm_dev_ioctl_check_extension(struct gzvm *gzvm, unsigned long args);
@@ -137,9 +148,15 @@ int gzvm_arch_inform_exit(u16 vm_id);
int gzvm_arch_drv_init(void);
void gzvm_arch_drv_exit(void);

-int gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn, u64 *pfn);
int gzvm_find_memslot(struct gzvm *vm, u64 gpa);
+int gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn, u64 *pfn);
+u64 gzvm_gfn_to_hva_memslot(struct gzvm_memslot *memslot, u64 gfn);
+u64 hva_to_pa_fast(u64 hva);
+u64 hva_to_pa_slow(u64 hva);
+int gzvm_handle_page_fault(struct gzvm_vcpu *vcpu);
+int gzvm_handle_relinquish(struct gzvm_vcpu *vcpu, phys_addr_t ipa);
bool gzvm_handle_guest_exception(struct gzvm_vcpu *vcpu);
+bool gzvm_handle_guest_hvc(struct gzvm_vcpu *vcpu);

int gzvm_arch_create_device(u16 vm_id, struct gzvm_create_device *gzvm_dev);
int gzvm_arch_inject_irq(struct gzvm *gzvm, unsigned int vcpu_idx,
diff --git a/include/uapi/linux/gzvm.h b/include/uapi/linux/gzvm.h
index a3329b713089..74b959994c4a 100644
--- a/include/uapi/linux/gzvm.h
+++ b/include/uapi/linux/gzvm.h
@@ -156,6 +156,11 @@ enum {
GZVM_EXCEPTION_PAGE_FAULT = 0x1,
};

+/* hypercall definitions of GZVM_EXIT_HYPERCALL */
+enum {
+ GZVM_HVC_MEM_RELINQUISH = 0xc6000009,
+};
+
/**
* struct gzvm_vcpu_run: Same purpose as kvm_run, this struct is
* shared between userspace, kernel and
--
2.18.0