[PATCH RFC v1 1/9] KVM: Introduce pinning flag to hva_to_pfn*

From: Nikunj A Dadhania
Date: Mon Mar 07 2022 - 23:40:19 EST


KVM allocates pages with get_user_pages_* (that use FOLL_GET). For
long term pinning of guest pages pin_user_pages_* (that use FOLL_PIN)
need to be used. Add a flag to hva_to_pfn* to allocate pinned pages
when the memslot represents encrypted memory.

Suggested-by: David Hildenbrand <david@xxxxxxxxxx>
Signed-off-by: Nikunj A Dadhania <nikunj@xxxxxxx>
---
include/linux/kvm_host.h | 6 ++++
virt/kvm/kvm_main.c | 63 ++++++++++++++++++++++++++++++----------
virt/kvm/kvm_mm.h | 2 +-
virt/kvm/pfncache.c | 2 +-
4 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f11039944c08..c23022960d51 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -55,6 +55,7 @@
* include/linux/kvm_h.
*/
#define KVM_MEMSLOT_INVALID (1UL << 16)
+#define KVM_MEMSLOT_ENCRYPTED (1UL << 17)

/*
* Bit 63 of the memslot generation number is an "update in-progress flag",
@@ -583,6 +584,11 @@ static inline unsigned long *kvm_second_dirty_bitmap(struct kvm_memory_slot *mem
return memslot->dirty_bitmap + len / sizeof(*memslot->dirty_bitmap);
}

+static inline bool memslot_is_encrypted(const struct kvm_memory_slot *slot)
+{
+ return slot && (slot->flags & KVM_MEMSLOT_ENCRYPTED);
+}
+
#ifndef KVM_DIRTY_LOG_MANUAL_CAPS
#define KVM_DIRTY_LOG_MANUAL_CAPS KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE
#endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0afc016cc54d..c035fe6b39ec 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2381,9 +2381,10 @@ static inline int check_user_page_hwpoison(unsigned long addr)
* only part that runs if we can in atomic context.
*/
static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
- bool *writable, kvm_pfn_t *pfn)
+ bool *writable, kvm_pfn_t *pfn, bool use_pin)
{
struct page *page[1];
+ bool ret;

/*
* Fast pin a writable pfn only if it is a write fault request
@@ -2393,7 +2394,12 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
if (!(write_fault || writable))
return false;

- if (get_user_page_fast_only(addr, FOLL_WRITE, page)) {
+ if (!use_pin)
+ ret = get_user_page_fast_only(addr, FOLL_WRITE, page);
+ else
+ ret = pin_user_pages_fast_only(addr, 1, FOLL_WRITE | FOLL_LONGTERM, page);
+
+ if (ret) {
*pfn = page_to_pfn(page[0]);

if (writable)
@@ -2409,9 +2415,9 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
* 1 indicates success, -errno is returned if error is detected.
*/
static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
- bool *writable, kvm_pfn_t *pfn)
+ bool *writable, kvm_pfn_t *pfn, bool use_pin)
{
- unsigned int flags = FOLL_HWPOISON;
+ unsigned int flags = 0;
struct page *page;
int npages = 0;

@@ -2422,20 +2428,41 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,

if (write_fault)
flags |= FOLL_WRITE;
- if (async)
- flags |= FOLL_NOWAIT;

- npages = get_user_pages_unlocked(addr, 1, &page, flags);
+ if (!use_pin) {
+ flags |= FOLL_HWPOISON;
+ if (async)
+ flags |= FOLL_NOWAIT;
+
+ npages = get_user_pages_unlocked(addr, 1, &page, flags);
+ } else {
+ /*
+ * FOLL_LONGTERM is not supported in pin_user_pages_unlocked,
+ * use *_fast instead.
+ */
+ flags |= FOLL_LONGTERM;
+ npages = pin_user_pages_fast(addr, 1, flags, &page);
+ }
+
if (npages != 1)
return npages;

/* map read fault as writable if possible */
if (unlikely(!write_fault) && writable) {
struct page *wpage;
+ bool ret;
+
+ if (!use_pin)
+ ret = get_user_page_fast_only(addr, FOLL_WRITE, &wpage);
+ else
+ ret = pin_user_pages_fast_only(addr, 1, FOLL_WRITE | FOLL_LONGTERM, &wpage);

- if (get_user_page_fast_only(addr, FOLL_WRITE, &wpage)) {
+ if (ret) {
*writable = true;
- put_page(page);
+ if (!use_pin)
+ put_page(page);
+ else
+ unpin_user_page(page);
page = wpage;
}
}
@@ -2541,7 +2568,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
* whether the mapping is writable.
*/
kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
- bool write_fault, bool *writable)
+ bool write_fault, bool *writable, bool use_pin)
{
struct vm_area_struct *vma;
kvm_pfn_t pfn = 0;
@@ -2550,13 +2577,13 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
/* we can do it either atomically or asynchronously, not both */
BUG_ON(atomic && async);

- if (hva_to_pfn_fast(addr, write_fault, writable, &pfn))
+ if (hva_to_pfn_fast(addr, write_fault, writable, &pfn, use_pin))
return pfn;

if (atomic)
return KVM_PFN_ERR_FAULT;

- npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn);
+ npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn, use_pin);
if (npages == 1)
return pfn;

@@ -2616,7 +2643,7 @@ kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
}

return hva_to_pfn(addr, atomic, async, write_fault,
- writable);
+ writable, memslot_is_encrypted(slot));
}
EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot);

@@ -2788,8 +2815,14 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean);

void kvm_release_pfn_clean(kvm_pfn_t pfn)
{
- if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn))
- put_page(pfn_to_page(pfn));
+ if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn)) {
+ struct page *page = pfn_to_page(pfn);
+
+ if (page_maybe_dma_pinned(page))
+ unpin_user_page(page);
+ else
+ put_page(page);
+ }
}
EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);

diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
index 34ca40823260..b1a5e379949b 100644
--- a/virt/kvm/kvm_mm.h
+++ b/virt/kvm/kvm_mm.h
@@ -25,7 +25,7 @@
#endif /* KVM_HAVE_MMU_RWLOCK */

kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
- bool write_fault, bool *writable);
+ bool write_fault, bool *writable, bool use_pin);

#ifdef CONFIG_HAVE_KVM_PFNCACHE
void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index ce878f4be4da..44384f06c81b 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -135,7 +135,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva)
smp_rmb();

/* We always request a writeable mapping */
- new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL);
+ new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL, false);
if (is_error_noslot_pfn(new_pfn))
break;

--
2.32.0