[PATCH RFC v1 9/9] KVM: SVM: Pin SEV pages in MMU during sev_launch_update_data()

From: Nikunj A Dadhania
Date: Mon Mar 07 2022 - 23:41:30 EST


From: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>

Pin the memory for the data being passed to launch_update_data()
because it gets encrypted before the guest is first run and must
not be moved which would corrupt it.

Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
[ * Use kvm_for_each_memslot_in_hva_range() to find slot and iterate
* Updated sev_pin_memory_in_mmu() error handling.
* As pinning/unpining pages is handled within MMU, removed
{get,put}_user(). ]
Signed-off-by: Nikunj A Dadhania <nikunj@xxxxxxx>
---
arch/x86/kvm/svm/sev.c | 146 +++++++++++++++++++++++++++++++++++++----
1 file changed, 134 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 7e39320fc65d..1c371268934b 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -22,6 +22,7 @@
#include <asm/trapnr.h>
#include <asm/fpu/xcr.h>

+#include "mmu.h"
#include "x86.h"
#include "svm.h"
#include "svm_ops.h"
@@ -428,9 +429,93 @@ static void *sev_alloc_pages(struct kvm_sev_info *sev, unsigned long uaddr,
return pages;
}

+#define SEV_PFERR_RO (PFERR_USER_MASK)
+#define SEV_PFERR_RW (PFERR_WRITE_MASK | PFERR_USER_MASK)
+
+static struct page **sev_pin_memory_in_mmu(struct kvm *kvm, unsigned long addr,
+ unsigned long size,
+ unsigned long *npages)
+{
+ unsigned long hva_start, hva_end, uaddr, end, slot_start, slot_end;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct interval_tree_node *node;
+ struct kvm_memory_slot *slot;
+ struct kvm_memslots *slots;
+ int idx, ret = 0, i = 0;
+ struct kvm_vcpu *vcpu;
+ struct page **pages;
+ kvm_pfn_t pfn;
+ u32 err_code;
+ gfn_t gfn;
+
+ pages = sev_alloc_pages(sev, addr, size, npages);
+ if (IS_ERR(pages))
+ return pages;
+
+ vcpu = kvm_get_vcpu(kvm, 0);
+ if (mutex_lock_killable(&vcpu->mutex)) {
+ kvfree(pages);
+ return ERR_PTR(-EINTR);
+ }
+
+ vcpu_load(vcpu);
+ idx = srcu_read_lock(&kvm->srcu);
+
+ kvm_mmu_load(vcpu);
+
+ end = addr + (*npages << PAGE_SHIFT);
+ slots = kvm_memslots(kvm);
+
+ kvm_for_each_memslot_in_hva_range(node, slots, addr, end) {
+ slot = container_of(node, struct kvm_memory_slot,
+ hva_node[slots->node_idx]);
+ slot_start = slot->userspace_addr;
+ slot_end = slot_start + (slot->npages << PAGE_SHIFT);
+ hva_start = max(addr, slot_start);
+ hva_end = min(end, slot_end);
+
+ err_code = (slot->flags & KVM_MEM_READONLY) ?
+ SEV_PFERR_RO : SEV_PFERR_RW;
+
+ for (uaddr = hva_start; uaddr < hva_end; uaddr += PAGE_SIZE) {
+ if (signal_pending(current)) {
+ ret = -ERESTARTSYS;
+ break;
+ }
+
+ if (need_resched())
+ cond_resched();
+
+ /*
+ * Fault in the page and sev_pin_page() will handle the
+ * pinning
+ */
+ gfn = hva_to_gfn_memslot(uaddr, slot);
+ pfn = kvm_mmu_map_tdp_page(vcpu, gfn_to_gpa(gfn),
+ err_code, PG_LEVEL_4K);
+ if (is_error_noslot_pfn(pfn)) {
+ ret = -EFAULT;
+ break;
+ }
+ pages[i++] = pfn_to_page(pfn);
+ }
+ }
+
+ kvm_mmu_unload(vcpu);
+ srcu_read_unlock(&kvm->srcu, idx);
+ vcpu_put(vcpu);
+ mutex_unlock(&vcpu->mutex);
+
+ if (!ret)
+ return pages;
+
+ kvfree(pages);
+ return ERR_PTR(ret);
+}
+
static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
unsigned long ulen, unsigned long *n,
- int write)
+ int write, bool mmu_usable)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct pinned_region *region;
@@ -441,6 +526,10 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,

lockdep_assert_held(&kvm->lock);

+ /* Use MMU based pinning if possible. */
+ if (mmu_usable)
+ return sev_pin_memory_in_mmu(kvm, uaddr, ulen, n);
+
pages = sev_alloc_pages(sev, uaddr, ulen, &npages);
if (IS_ERR(pages))
return pages;
@@ -558,6 +647,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ bool mmu_usable = atomic_read(&kvm->online_vcpus) > 0;
struct kvm_sev_launch_update_data params;
struct sev_data_launch_update_data data;
struct page **inpages;
@@ -574,15 +664,18 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
vaddr_end = vaddr + size;

/* Lock the user memory. */
- inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
+ inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1, mmu_usable);
if (IS_ERR(inpages))
return PTR_ERR(inpages);

/*
* Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
* place; the cache may contain the data that was written unencrypted.
+ * Flushing is automatically handled if the pages can be pinned in the
+ * MMU.
*/
- sev_clflush_pages(inpages, npages);
+ if (!mmu_usable)
+ sev_clflush_pages(inpages, npages);

data.reserved = 0;
data.handle = sev->handle;
@@ -617,9 +710,14 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
set_page_dirty_lock(inpages[i]);
mark_page_accessed(inpages[i]);
}
- /* unlock the user pages on error */
+ /*
+ * unlock the user pages on error, else pages will be unpinned either
+ * during memslot free path or vm destroy path
+ */
if (ret)
sev_unpin_memory(kvm, inpages, npages);
+ else if (mmu_usable)
+ kvfree(inpages);
return ret;
}

@@ -1001,11 +1099,11 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
int len, s_off, d_off;

/* lock userspace source and destination page */
- src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
+ src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0, false);
if (IS_ERR(src_p))
return PTR_ERR(src_p);

- dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
+ dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1, false);
if (IS_ERR(dst_p)) {
sev_unpin_memory(kvm, src_p, n);
return PTR_ERR(dst_p);
@@ -1057,6 +1155,7 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)

static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
+ bool mmu_usable = atomic_read(&kvm->online_vcpus) > 0;
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_secret data;
struct kvm_sev_launch_secret params;
@@ -1071,15 +1170,18 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
return -EFAULT;

- pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
+ pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1, mmu_usable);
if (IS_ERR(pages))
return PTR_ERR(pages);

/*
* Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in
* place; the cache may contain the data that was written unencrypted.
+ * Flushing is automatically handled if the pages can be pinned in the
+ * MMU.
*/
- sev_clflush_pages(pages, n);
+ if (!mmu_usable)
+ sev_clflush_pages(pages, n);

/*
* The secret must be copied into contiguous memory region, lets verify
@@ -1126,8 +1228,15 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
set_page_dirty_lock(pages[i]);
mark_page_accessed(pages[i]);
}
+ /*
+ * unlock the user pages on error, else pages will be unpinned either
+ * during memslot free path or vm destroy path
+ */
if (ret)
sev_unpin_memory(kvm, pages, n);
+ else if (mmu_usable)
+ kvfree(pages);
+
return ret;
}

@@ -1358,7 +1467,7 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)

/* Pin guest memory */
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
- PAGE_SIZE, &n, 0);
+ PAGE_SIZE, &n, 0, false);
if (IS_ERR(guest_page))
return PTR_ERR(guest_page);

@@ -1406,6 +1515,10 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
e_free_hdr:
kfree(hdr);
e_unpin:
+ /*
+ * unlock the user pages on error, else pages will be unpinned either
+ * during memslot free path or vm destroy path
+ */
if (ret)
sev_unpin_memory(kvm, guest_page, n);

@@ -1512,6 +1625,7 @@ static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp)

static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
+ bool mmu_usable = atomic_read(&kvm->online_vcpus) > 0;
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_receive_update_data params;
struct sev_data_receive_update_data data;
@@ -1555,7 +1669,7 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)

/* Pin guest memory */
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
- PAGE_SIZE, &n, 1);
+ PAGE_SIZE, &n, 1, mmu_usable);
if (IS_ERR(guest_page)) {
ret = PTR_ERR(guest_page);
goto e_free_trans;
@@ -1564,9 +1678,11 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
/*
* Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP
* encrypts the written data with the guest's key, and the cache may
- * contain dirty, unencrypted data.
+ * contain dirty, unencrypted data. Flushing is automatically handled if
+ * the pages can be pinned in the MMU.
*/
- sev_clflush_pages(guest_page, n);
+ if (!mmu_usable)
+ sev_clflush_pages(guest_page, n);

/* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
@@ -1577,8 +1693,14 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
ret = sev_issue_cmd(kvm, SEV_CMD_RECEIVE_UPDATE_DATA, &data,
&argp->error);

+ /*
+ * unlock the user pages on error, else pages will be unpinned either
+ * during memslot free path or vm destroy path
+ */
if (ret)
sev_unpin_memory(kvm, guest_page, n);
+ else if (mmu_usable)
+ kvfree(guest_page);

e_free_trans:
kfree(trans);
--
2.32.0