Re: [PATCH RFC v8 44/56] KVM: SVM: Add support to handle the RMP nested page fault

From: Zhi Wang
Date: Tue Feb 28 2023 - 14:12:07 EST


On Mon, 20 Feb 2023 12:38:35 -0600
Michael Roth <michael.roth@xxxxxxx> wrote:

> From: Brijesh Singh <brijesh.singh@xxxxxxx>
>
> When SEV-SNP is enabled in the guest, the hardware places restrictions
> on all memory accesses based on the contents of the RMP table. When
> hardware encounters RMP check failure caused by the guest memory access
> it raises the #NPF. The error code contains additional information on
> the access type. See the APM volume 2 for additional information.
>
> Page state changes are handled by userspace, so if an RMP fault is
> triggered as a result of an RMP NPT fault, exit to userspace just like
> with explicit page-state change requests.
>
> RMP NPT faults can also occur if the guest pvalidates a 2M page as 4K,
> in which case the RMP entries need to be PSMASH'd. Handle this case
> immediately in the kernel.
>
> Co-developed-by: Michael Roth <michael.roth@xxxxxxx>
> Signed-off-by: Michael Roth <michael.roth@xxxxxxx>
> Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
> Signed-off-by: Ashish Kalra <ashish.kalra@xxxxxxx>
> ---
> arch/x86/kvm/svm/sev.c | 84 ++++++++++++++++++++++++++++++++++++++++++
> arch/x86/kvm/svm/svm.c | 21 +++++++++--
> arch/x86/kvm/svm/svm.h | 1 +
> 3 files changed, 102 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 102966c43e28..197b1f904567 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -3347,6 +3347,13 @@ static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
> svm->vmcb->control.ghcb_gpa = value;
> }
>
> +static int snp_rmptable_psmash(struct kvm *kvm, kvm_pfn_t pfn)
> +{
> + pfn = pfn & ~(KVM_PAGES_PER_HPAGE(PG_LEVEL_2M) - 1);
> +
> + return psmash(pfn);
> +}
> +
> /*
> * TODO: need to get the value set by userspace in vcpu->run->vmgexit.ghcb_msr
> * and process that here accordingly.
> @@ -3872,3 +3879,80 @@ void sev_adjust_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *le
> pr_debug("%s: GFN: 0x%llx, PFN: 0x%llx, level: %d, rmp_level: %d, level_orig: %d, assigned: %d\n",
> __func__, gfn, pfn, *level, rmp_level, level_orig, assigned);
> }
> +
> +void handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code)
> +{
> + int order, rmp_level, assigned, ret;
> + struct kvm_memory_slot *slot;
> + struct kvm *kvm = vcpu->kvm;
> + kvm_pfn_t pfn;
> + gfn_t gfn;
> +
> + /*
> + * Private memslots punt handling of implicit page state changes to
^put
> + * userspace, so the only RMP faults expected here for
> + * PFERR_GUEST_SIZEM_MASK. Anything else suggests that the RMP table has
> + * gotten out of sync with the private memslot.
> + *
> + * TODO: However, this case has also been noticed when an access occurs
> + * to an NPT mapping that has just been split/PSMASHED, in which case
> + * PFERR_GUEST_SIZEM_MASK might not be set. In those cases it should be
> + * safe to ignore and let the guest retry, but log these just in case
> + * for now.
> + */
> + if (!(error_code & PFERR_GUEST_SIZEM_MASK)) {
> + pr_warn_ratelimited("Unexpected RMP fault for GPA 0x%llx, error_code 0x%llx",
> + gpa, error_code);
> + return;
> + }
> +
> + gfn = gpa >> PAGE_SHIFT;
> +
> + /*
> + * Only RMPADJUST/PVALIDATE should cause PFERR_GUEST_SIZEM.
> + *
> + * For PVALIDATE, this should only happen if a guest PVALIDATEs a 4K GFN
> + * that is backed by a huge page in the host whose RMP entry has the
> + * hugepage/assigned bits set. With UPM, that should only ever happen
> + * for private pages.
> + *
> + * For RMPADJUST, this assumption might not hold, in which case handling
> + * for obtaining the PFN from HVA-backed memory may be needed. For now,
> + * just print warnings.
> + */
> + if (!kvm_mem_is_private(kvm, gfn)) {
> + pr_warn_ratelimited("Unexpected RMP fault, size-mismatch for non-private GPA 0x%llx\n",
> + gpa);
> + return;
> + }
> +
> + slot = gfn_to_memslot(kvm, gfn);
> + if (!kvm_slot_can_be_private(slot)) {
> + pr_warn_ratelimited("Unexpected RMP fault, non-private slot for GPA 0x%llx\n",
> + gpa);
> + return;
> + }
> +
> + ret = kvm_restrictedmem_get_pfn(slot, gfn, &pfn, &order);
> + if (ret) {
> + pr_warn_ratelimited("Unexpected RMP fault, no private backing page for GPA 0x%llx\n",
> + gpa);
> + return;
> + }
> +
> + assigned = snp_lookup_rmpentry(pfn, &rmp_level);
> + if (assigned != 1) {
> + pr_warn_ratelimited("Unexpected RMP fault, no assigned RMP entry for GPA 0x%llx\n",
> + gpa);
> + goto out;
> + }
> +
> + ret = snp_rmptable_psmash(kvm, pfn);
> + if (ret)
> + pr_err_ratelimited("Unable to split RMP entries for GPA 0x%llx PFN 0x%llx ret %d\n",
> + gpa, pfn, ret);
> +
> +out:
> + kvm_zap_gfn_range(kvm, gfn, gfn + PTRS_PER_PMD);
> + put_page(pfn_to_page(pfn));
> +}
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 9eb750c8b04c..f9ab4bf6d245 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1976,15 +1976,28 @@ static int pf_interception(struct kvm_vcpu *vcpu)
> static int npf_interception(struct kvm_vcpu *vcpu)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
> + int rc;
>
> u64 fault_address = svm->vmcb->control.exit_info_2;
> u64 error_code = svm->vmcb->control.exit_info_1;
>
> trace_kvm_page_fault(vcpu, fault_address, error_code);
> - return kvm_mmu_page_fault(vcpu, fault_address, error_code,
> - static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
> - svm->vmcb->control.insn_bytes : NULL,
> - svm->vmcb->control.insn_len);
> + rc = kvm_mmu_page_fault(vcpu, fault_address, error_code,
> + static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
> + svm->vmcb->control.insn_bytes : NULL,
> + svm->vmcb->control.insn_len);
> +
> + /*
> + * rc == 0 indicates a userspace exit is needed to handle page
> + * transitions, so do that first before updating the RMP table.
> + */
> + if (error_code & PFERR_GUEST_RMP_MASK) {
> + if (rc == 0)
> + return rc;
> + handle_rmp_page_fault(vcpu, fault_address, error_code);
> + }
> +
> + return rc;
> }
>
> static int db_interception(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 0c655a4d32d5..13b00233b315 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -714,6 +714,7 @@ void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
> void sev_es_unmap_ghcb(struct vcpu_svm *svm);
> struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
> void sev_adjust_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *level);
> +void handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code);
>
> /* vmenter.S */
>