[PATCH RFC v1 4/9] KVM: SVM: Add pinning metadata in the arch memslot

From: Nikunj A Dadhania
Date: Mon Mar 07 2022 - 23:40:41 EST


AMD SEV guest requires the guest's pages to be pinned in host
physical memory. The memory encryption scheme uses the physical
address of the memory being encrypted. If guest pages are moved,
content decrypted would be incorrect, corrupting guest's memory.

For SEV/SEV-ES guests, the hypervisor doesn't know which pages are
encrypted and when the guest is done using those pages. Hypervisor
should treat all the guest pages as encrypted until they are
deallocated or the guest is destroyed.

The KVM MMU needs to track the pages that are pinned and the
corresponding pfns for unpinning them during the guest destroy path
and deallocation path.

Signed-off-by: Nikunj A Dadhania <nikunj@xxxxxxx>
---
arch/x86/include/asm/kvm-x86-ops.h | 2 ++
arch/x86/include/asm/kvm_host.h | 7 +++++
arch/x86/kvm/svm/sev.c | 49 ++++++++++++++++++++++++++++++
arch/x86/kvm/svm/svm.c | 3 ++
arch/x86/kvm/svm/svm.h | 6 ++++
arch/x86/kvm/x86.c | 11 ++++++-
6 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 8efb43d92eef..61ff8a636db6 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -89,6 +89,8 @@ KVM_X86_OP(set_identity_map_addr)
KVM_X86_OP(get_mt_mask)
KVM_X86_OP(load_mmu_pgd)
KVM_X86_OP(pin_pfn)
+KVM_X86_OP(alloc_memslot_metadata)
+KVM_X86_OP(free_memslot)
KVM_X86_OP_NULL(has_wbinvd_exit)
KVM_X86_OP(get_l2_tsc_offset)
KVM_X86_OP(get_l2_tsc_multiplier)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index df11f1fb76de..eeb2c799b59f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -926,6 +926,8 @@ struct kvm_arch_memory_slot {
struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
+ unsigned long *pinned_bitmap;
+ kvm_pfn_t *pfns;
};

/*
@@ -1421,6 +1423,11 @@ struct kvm_x86_ops {
bool (*pin_pfn)(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
kvm_pfn_t pfn, hva_t hva, bool write,
enum pg_level level);
+ int (*alloc_memslot_metadata)(struct kvm *kvm,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new);
+ void (*free_memslot)(struct kvm *kvm,
+ struct kvm_memory_slot *slot);

bool (*has_wbinvd_exit)(void);

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 17b53457d866..bd7572517c99 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2950,3 +2950,52 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)

ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
}
+
+void sev_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+ struct kvm_arch_memory_slot *aslot = &slot->arch;
+
+ if (!sev_guest(kvm))
+ return;
+
+ if (aslot->pinned_bitmap) {
+ kvfree(aslot->pinned_bitmap);
+ aslot->pinned_bitmap = NULL;
+ }
+
+ if (aslot->pfns) {
+ kvfree(aslot->pfns);
+ aslot->pfns = NULL;
+ }
+}
+
+int sev_alloc_memslot_metadata(struct kvm *kvm,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new)
+{
+ struct kvm_arch_memory_slot *aslot = &new->arch;
+ unsigned long pinned_bytes = new->npages * sizeof(kvm_pfn_t);
+
+ if (!sev_guest(kvm))
+ return 0;
+
+ if (old && old->arch.pinned_bitmap && old->arch.pfns) {
+ WARN_ON(old->npages != new->npages);
+ aslot->pinned_bitmap = old->arch.pinned_bitmap;
+ aslot->pfns = old->arch.pfns;
+ return 0;
+ }
+
+ aslot->pfns = kvcalloc(new->npages, sizeof(*aslot->pfns),
+ GFP_KERNEL_ACCOUNT);
+ if (!aslot->pfns)
+ return -ENOMEM;
+
+ aslot->pinned_bitmap = kvzalloc(pinned_bytes, GFP_KERNEL_ACCOUNT);
+ if (!aslot->pinned_bitmap) {
+ kvfree(aslot->pfns);
+ aslot->pfns = NULL;
+ return -ENOMEM;
+ }
+ return 0;
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index fd3a00c892c7..ec06421cb532 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4658,6 +4658,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.complete_emulated_msr = svm_complete_emulated_msr,

.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
+
+ .alloc_memslot_metadata = sev_alloc_memslot_metadata,
+ .free_memslot = sev_free_memslot,
};

/*
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index fa98d6844728..f00364020d7e 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -616,4 +616,10 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm);
void __svm_sev_es_vcpu_run(unsigned long vmcb_pa);
void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);

+int sev_alloc_memslot_metadata(struct kvm *kvm,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new);
+void sev_free_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot);
+
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 82a9dcd8c67f..95070aaa1636 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11796,6 +11796,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
}

kvm_page_track_free_memslot(slot);
+ static_call_cond(kvm_x86_free_memslot)(kvm, slot);
}

int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages)
@@ -11821,6 +11822,7 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages)
}

static int kvm_alloc_memslot_metadata(struct kvm *kvm,
+ const struct kvm_memory_slot *old,
struct kvm_memory_slot *slot)
{
unsigned long npages = slot->npages;
@@ -11873,8 +11875,15 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,
if (kvm_page_track_create_memslot(kvm, slot, npages))
goto out_free;

+ if (kvm_x86_ops.alloc_memslot_metadata &&
+ static_call(kvm_x86_alloc_memslot_metadata)(kvm, old, slot))
+ goto out_free_page_track;
+
return 0;

+out_free_page_track:
+ kvm_page_track_free_memslot(slot);
+
out_free:
memslot_rmap_free(slot);

@@ -11907,7 +11916,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
enum kvm_mr_change change)
{
if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
- return kvm_alloc_memslot_metadata(kvm, new);
+ return kvm_alloc_memslot_metadata(kvm, old, new);

if (change == KVM_MR_FLAGS_ONLY)
memcpy(&new->arch, &old->arch, sizeof(old->arch));
--
2.32.0