Re: [RFC PATCH 6/6] KVM: SVM: Pin SEV pages in MMU during sev_launch_update_data()

From: Peter Gonda
Date: Thu Jan 20 2022 - 11:17:36 EST


On Tue, Jan 18, 2022 at 4:07 AM Nikunj A Dadhania <nikunj@xxxxxxx> wrote:
>
> From: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
>
> Pin the memory for the data being passed to launch_update_data()
> because it gets encrypted before the guest is first run and must
> not be moved which would corrupt it.
>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
> [ * Changed hva_to_gva() to take an extra argument and return gpa_t.
> * Updated sev_pin_memory_in_mmu() error handling.
> * As pinning/unpining pages is handled within MMU, removed
> {get,put}_user(). ]
> Signed-off-by: Nikunj A Dadhania <nikunj@xxxxxxx>
> ---
> arch/x86/kvm/svm/sev.c | 122 ++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 119 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 14aeccfc500b..1ae714e83a3c 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -22,6 +22,7 @@
> #include <asm/trapnr.h>
> #include <asm/fpu/xcr.h>
>
> +#include "mmu.h"
> #include "x86.h"
> #include "svm.h"
> #include "svm_ops.h"
> @@ -490,6 +491,110 @@ static unsigned long get_num_contig_pages(unsigned long idx,
> return pages;
> }
>
> +#define SEV_PFERR_RO (PFERR_USER_MASK)
> +#define SEV_PFERR_RW (PFERR_WRITE_MASK | PFERR_USER_MASK)
> +
> +static struct kvm_memory_slot *hva_to_memslot(struct kvm *kvm,
> + unsigned long hva)
> +{
> + struct kvm_memslots *slots = kvm_memslots(kvm);
> + struct kvm_memory_slot *memslot;
> + int bkt;
> +
> + kvm_for_each_memslot(memslot, bkt, slots) {
> + if (hva >= memslot->userspace_addr &&
> + hva < memslot->userspace_addr +
> + (memslot->npages << PAGE_SHIFT))
> + return memslot;
> + }
> +
> + return NULL;
> +}
> +
> +static gpa_t hva_to_gpa(struct kvm *kvm, unsigned long hva, bool *ro)
> +{
> + struct kvm_memory_slot *memslot;
> + gpa_t gpa_offset;
> +
> + memslot = hva_to_memslot(kvm, hva);
> + if (!memslot)
> + return UNMAPPED_GVA;
> +
> + *ro = !!(memslot->flags & KVM_MEM_READONLY);
> + gpa_offset = hva - memslot->userspace_addr;
> + return ((memslot->base_gfn << PAGE_SHIFT) + gpa_offset);
> +}
> +
> +static struct page **sev_pin_memory_in_mmu(struct kvm *kvm, unsigned long addr,
> + unsigned long size,
> + unsigned long *npages)
> +{
> + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> + struct kvm_vcpu *vcpu;
> + struct page **pages;
> + unsigned long i;
> + u32 error_code;
> + kvm_pfn_t pfn;
> + int idx, ret = 0;
> + gpa_t gpa;
> + bool ro;
> +
> + pages = sev_alloc_pages(sev, addr, size, npages);
> + if (IS_ERR(pages))
> + return pages;
> +
> + vcpu = kvm_get_vcpu(kvm, 0);
> + if (mutex_lock_killable(&vcpu->mutex)) {
> + kvfree(pages);
> + return ERR_PTR(-EINTR);
> + }
> +
> + vcpu_load(vcpu);
> + idx = srcu_read_lock(&kvm->srcu);
> +
> + kvm_mmu_load(vcpu);
> +
> + for (i = 0; i < *npages; i++, addr += PAGE_SIZE) {
> + if (signal_pending(current)) {
> + ret = -ERESTARTSYS;
> + break;
> + }
> +
> + if (need_resched())
> + cond_resched();
> +
> + gpa = hva_to_gpa(kvm, addr, &ro);
> + if (gpa == UNMAPPED_GVA) {
> + ret = -EFAULT;
> + break;
> + }
> +
> + error_code = ro ? SEV_PFERR_RO : SEV_PFERR_RW;
> +
> + /*
> + * Fault in the page and sev_pin_page() will handle the
> + * pinning
> + */
> + pfn = kvm_mmu_map_tdp_page(vcpu, gpa, error_code, PG_LEVEL_4K);
> + if (is_error_noslot_pfn(pfn)) {
> + ret = -EFAULT;
> + break;
> + }
> + pages[i] = pfn_to_page(pfn);
> + }
> +
> + kvm_mmu_unload(vcpu);
> + srcu_read_unlock(&kvm->srcu, idx);
> + vcpu_put(vcpu);
> + mutex_unlock(&vcpu->mutex);
> +
> + if (!ret)
> + return pages;
> +
> + kvfree(pages);
> + return ERR_PTR(ret);
> +}
> +
> static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
> {
> unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
> @@ -510,15 +615,21 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
> vaddr_end = vaddr + size;
>
> /* Lock the user memory. */
> - inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
> + if (atomic_read(&kvm->online_vcpus))
> + inpages = sev_pin_memory_in_mmu(kvm, vaddr, size, &npages);

IIUC we can only use the sev_pin_memory_in_mmu() when there is an
online vCPU because that means the MMU has been setup enough to use?
Can we add a variable and a comment to help explain that?

bool mmu_usable = atomic_read(&kvm->online_vcpus) > 0;

> + else
> + inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);

So I am confused about this case. Since svm_register_enc_region() is
now a NOOP how can a user ensure that memory remains pinned from
sev_launch_update_data() to when the memory would be demand pinned?

Before users could svm_register_enc_region() which pins the region,
then sev_launch_update_data(), then the VM could run an the data from
sev_launch_update_data() would have never moved. I don't think that
same guarantee is held here?

> if (IS_ERR(inpages))
> return PTR_ERR(inpages);
>
> /*
> * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
> * place; the cache may contain the data that was written unencrypted.
> + * Flushing is automatically handled if the pages can be pinned in the
> + * MMU.
> */
> - sev_clflush_pages(inpages, npages);
> + if (!atomic_read(&kvm->online_vcpus))
> + sev_clflush_pages(inpages, npages);
>
> data.reserved = 0;
> data.handle = sev->handle;
> @@ -553,8 +664,13 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
> set_page_dirty_lock(inpages[i]);
> mark_page_accessed(inpages[i]);
> }
> +
> /* unlock the user pages */
> - sev_unpin_memory(kvm, inpages, npages);
> + if (atomic_read(&kvm->online_vcpus))
> + kvfree(inpages);
> + else
> + sev_unpin_memory(kvm, inpages, npages);
> +
> return ret;
> }
>
> --
> 2.32.0
>