Re: [RFC PATCH 6/6] KVM: SVM: Pin SEV pages in MMU during sev_launch_update_data()

From: Nikunj A. Dadhania
Date: Thu Jan 20 2022 - 23:08:41 EST


On 1/20/2022 9:47 PM, Peter Gonda wrote:
> On Tue, Jan 18, 2022 at 4:07 AM Nikunj A Dadhania <nikunj@xxxxxxx> wrote:
>>
>> From: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
>>
>> Pin the memory for the data being passed to launch_update_data()
>> because it gets encrypted before the guest is first run and must
>> not be moved which would corrupt it.
>>
>> Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
>> [ * Changed hva_to_gva() to take an extra argument and return gpa_t.
>> * Updated sev_pin_memory_in_mmu() error handling.
>> * As pinning/unpining pages is handled within MMU, removed
>> {get,put}_user(). ]
>> Signed-off-by: Nikunj A Dadhania <nikunj@xxxxxxx>
>> ---
>> arch/x86/kvm/svm/sev.c | 122 ++++++++++++++++++++++++++++++++++++++++-
>> 1 file changed, 119 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
>> index 14aeccfc500b..1ae714e83a3c 100644
>> --- a/arch/x86/kvm/svm/sev.c
>> +++ b/arch/x86/kvm/svm/sev.c
>> @@ -22,6 +22,7 @@
>> #include <asm/trapnr.h>
>> #include <asm/fpu/xcr.h>
>>
>> +#include "mmu.h"
>> #include "x86.h"
>> #include "svm.h"
>> #include "svm_ops.h"
>> @@ -490,6 +491,110 @@ static unsigned long get_num_contig_pages(unsigned long idx,
>> return pages;
>> }
>>
>> +#define SEV_PFERR_RO (PFERR_USER_MASK)
>> +#define SEV_PFERR_RW (PFERR_WRITE_MASK | PFERR_USER_MASK)
>> +
>> +static struct kvm_memory_slot *hva_to_memslot(struct kvm *kvm,
>> + unsigned long hva)
>> +{
>> + struct kvm_memslots *slots = kvm_memslots(kvm);
>> + struct kvm_memory_slot *memslot;
>> + int bkt;
>> +
>> + kvm_for_each_memslot(memslot, bkt, slots) {
>> + if (hva >= memslot->userspace_addr &&
>> + hva < memslot->userspace_addr +
>> + (memslot->npages << PAGE_SHIFT))
>> + return memslot;
>> + }
>> +
>> + return NULL;
>> +}
>> +
>> +static gpa_t hva_to_gpa(struct kvm *kvm, unsigned long hva, bool *ro)
>> +{
>> + struct kvm_memory_slot *memslot;
>> + gpa_t gpa_offset;
>> +
>> + memslot = hva_to_memslot(kvm, hva);
>> + if (!memslot)
>> + return UNMAPPED_GVA;
>> +
>> + *ro = !!(memslot->flags & KVM_MEM_READONLY);
>> + gpa_offset = hva - memslot->userspace_addr;
>> + return ((memslot->base_gfn << PAGE_SHIFT) + gpa_offset);
>> +}
>> +
>> +static struct page **sev_pin_memory_in_mmu(struct kvm *kvm, unsigned long addr,
>> + unsigned long size,
>> + unsigned long *npages)
>> +{
>> + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
>> + struct kvm_vcpu *vcpu;
>> + struct page **pages;
>> + unsigned long i;
>> + u32 error_code;
>> + kvm_pfn_t pfn;
>> + int idx, ret = 0;
>> + gpa_t gpa;
>> + bool ro;
>> +
>> + pages = sev_alloc_pages(sev, addr, size, npages);
>> + if (IS_ERR(pages))
>> + return pages;
>> +
>> + vcpu = kvm_get_vcpu(kvm, 0);
>> + if (mutex_lock_killable(&vcpu->mutex)) {
>> + kvfree(pages);
>> + return ERR_PTR(-EINTR);
>> + }
>> +
>> + vcpu_load(vcpu);
>> + idx = srcu_read_lock(&kvm->srcu);
>> +
>> + kvm_mmu_load(vcpu);
>> +
>> + for (i = 0; i < *npages; i++, addr += PAGE_SIZE) {
>> + if (signal_pending(current)) {
>> + ret = -ERESTARTSYS;
>> + break;
>> + }
>> +
>> + if (need_resched())
>> + cond_resched();
>> +
>> + gpa = hva_to_gpa(kvm, addr, &ro);
>> + if (gpa == UNMAPPED_GVA) {
>> + ret = -EFAULT;
>> + break;
>> + }
>> +
>> + error_code = ro ? SEV_PFERR_RO : SEV_PFERR_RW;
>> +
>> + /*
>> + * Fault in the page and sev_pin_page() will handle the
>> + * pinning
>> + */
>> + pfn = kvm_mmu_map_tdp_page(vcpu, gpa, error_code, PG_LEVEL_4K);
>> + if (is_error_noslot_pfn(pfn)) {
>> + ret = -EFAULT;
>> + break;
>> + }
>> + pages[i] = pfn_to_page(pfn);
>> + }
>> +
>> + kvm_mmu_unload(vcpu);
>> + srcu_read_unlock(&kvm->srcu, idx);
>> + vcpu_put(vcpu);
>> + mutex_unlock(&vcpu->mutex);
>> +
>> + if (!ret)
>> + return pages;
>> +
>> + kvfree(pages);
>> + return ERR_PTR(ret);
>> +}
>> +
>> static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
>> {
>> unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
>> @@ -510,15 +615,21 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
>> vaddr_end = vaddr + size;
>>
>> /* Lock the user memory. */
>> - inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
>> + if (atomic_read(&kvm->online_vcpus))
>> + inpages = sev_pin_memory_in_mmu(kvm, vaddr, size, &npages);
>
> IIUC we can only use the sev_pin_memory_in_mmu() when there is an
> online vCPU because that means the MMU has been setup enough to use?
> Can we add a variable and a comment to help explain that?
>
> bool mmu_usable = atomic_read(&kvm->online_vcpus) > 0;

Sure, will add comment and the variable.

>
>> + else
>> + inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
>
> So I am confused about this case. Since svm_register_enc_region() is
> now a NOOP how can a user ensure that memory remains pinned from
> sev_launch_update_data() to when the memory would be demand pinned?
>
> Before users could svm_register_enc_region() which pins the region,
> then sev_launch_update_data(), then the VM could run an the data from
> sev_launch_update_data() would have never moved. I don't think that
> same guarantee is held here?

Yes, you are right. One way is to error out of this call if MMU is not setup.
Other one would require us to maintain all list of pinned memory via sev_pin_memory()
and unpin them in the destroy path.

>> if (IS_ERR(inpages))
>> return PTR_ERR(inpages);
>>
>> /*
>> * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
>> * place; the cache may contain the data that was written unencrypted.
>> + * Flushing is automatically handled if the pages can be pinned in the
>> + * MMU.
>> */
>> - sev_clflush_pages(inpages, npages);
>> + if (!atomic_read(&kvm->online_vcpus))
>> + sev_clflush_pages(inpages, npages);
>>
>> data.reserved = 0;
>> data.handle = sev->handle;
>> @@ -553,8 +664,13 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
>> set_page_dirty_lock(inpages[i]);
>> mark_page_accessed(inpages[i]);
>> }
>> +
>> /* unlock the user pages */
>> - sev_unpin_memory(kvm, inpages, npages);
>> + if (atomic_read(&kvm->online_vcpus))
>> + kvfree(inpages);

>> + else
>> + sev_unpin_memory(kvm, inpages, npages);

And not unpin here in this case.

Regards
Nikunj