Re: [PATCH v3 11/11] KVM:SVM: Enable INVPCID feature on AMD

From: Jim Mattson
Date: Wed Jul 29 2020 - 19:01:34 EST


On Tue, Jul 28, 2020 at 4:39 PM Babu Moger <babu.moger@xxxxxxx> wrote:
>
> The following intercept bit has been added to support VMEXIT
> for INVPCID instruction:
> Code Name Cause
> A2h VMEXIT_INVPCID INVPCID instruction
>
> The following bit has been added to the VMCB layout control area
> to control intercept of INVPCID:
> Byte Offset Bit(s) Function
> 14h 2 intercept INVPCID
>
> Enable the interceptions when the the guest is running with shadow
> page table enabled and handle the tlbflush based on the invpcid
> instruction type.
>
> For the guests with nested page table (NPT) support, the INVPCID
> feature works as running it natively. KVM does not need to do any
> special handling in this case.
>
> AMD documentation for INVPCID feature is available at "AMD64
> Architecture Programmerâs Manual Volume 2: System Programming,
> Pub. 24593 Rev. 3.34(or later)"
>
> The documentation can be obtained at the links below:
> Link: https://www.amd.com/system/files/TechDocs/24593.pdf
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537
>
> Signed-off-by: Babu Moger <babu.moger@xxxxxxx>
> ---
> arch/x86/include/uapi/asm/svm.h | 2 +
> arch/x86/kvm/svm/svm.c | 64 +++++++++++++++++++++++++++++++++++++++
> 2 files changed, 66 insertions(+)
>
> diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
> index 2e8a30f06c74..522d42dfc28c 100644
> --- a/arch/x86/include/uapi/asm/svm.h
> +++ b/arch/x86/include/uapi/asm/svm.h
> @@ -76,6 +76,7 @@
> #define SVM_EXIT_MWAIT_COND 0x08c
> #define SVM_EXIT_XSETBV 0x08d
> #define SVM_EXIT_RDPRU 0x08e
> +#define SVM_EXIT_INVPCID 0x0a2
> #define SVM_EXIT_NPF 0x400
> #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
> #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
> @@ -171,6 +172,7 @@
> { SVM_EXIT_MONITOR, "monitor" }, \
> { SVM_EXIT_MWAIT, "mwait" }, \
> { SVM_EXIT_XSETBV, "xsetbv" }, \
> + { SVM_EXIT_INVPCID, "invpcid" }, \
> { SVM_EXIT_NPF, "npf" }, \
> { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
> { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 99cc9c285fe6..6b099e0b28c0 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -813,6 +813,11 @@ static __init void svm_set_cpu_caps(void)
> if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
> boot_cpu_has(X86_FEATURE_AMD_SSBD))
> kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
> +
> + /* Enable INVPCID if both PCID and INVPCID enabled */
> + if (boot_cpu_has(X86_FEATURE_PCID) &&
> + boot_cpu_has(X86_FEATURE_INVPCID))
> + kvm_cpu_cap_set(X86_FEATURE_INVPCID);
> }

Why is PCID required? Can't this just be
'kvm_cpu_cap_check_and_set(X86_FEATURE_INVPCID);'?

> static __init int svm_hardware_setup(void)
> @@ -1099,6 +1104,18 @@ static void init_vmcb(struct vcpu_svm *svm)
> clr_intercept(svm, INTERCEPT_PAUSE);
> }
>
> + /*
> + * Intercept INVPCID instruction only if shadow page table is
> + * enabled. Interception is not required with nested page table
> + * enabled.
> + */
> + if (boot_cpu_has(X86_FEATURE_INVPCID)) {

Shouldn't this be 'kvm_cpu_cap_has(X86_FEATURE_INVPCID),' so that it
is consistent with the code above?

> + if (!npt_enabled)
> + set_intercept(svm, INTERCEPT_INVPCID);
> + else
> + clr_intercept(svm, INTERCEPT_INVPCID);
> + }
> +
> if (kvm_vcpu_apicv_active(&svm->vcpu))
> avic_init_vmcb(svm);
>
> @@ -2715,6 +2732,43 @@ static int mwait_interception(struct vcpu_svm *svm)
> return nop_interception(svm);
> }
>
> +static int invpcid_interception(struct vcpu_svm *svm)
> +{
> + struct kvm_vcpu *vcpu = &svm->vcpu;
> + struct x86_exception e;
> + unsigned long type;
> + gva_t gva;
> + struct {
> + u64 pcid;
> + u64 gla;
> + } operand;
> +
> + if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
> + kvm_queue_exception(vcpu, UD_VECTOR);
> + return 1;
> + }
> +
> + /*
> + * For an INVPCID intercept:
> + * EXITINFO1 provides the linear address of the memory operand.
> + * EXITINFO2 provides the contents of the register operand.
> + */
> + type = svm->vmcb->control.exit_info_2;
> + gva = svm->vmcb->control.exit_info_1;
> +
> + if (type > 3) {
> + kvm_inject_gp(vcpu, 0);
> + return 1;
> + }
> +
> + if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
> + kvm_inject_emulated_page_fault(vcpu, &e);
> + return 1;
> + }

The emulated page fault is not always correct. See commit
7a35e515a7055 ("KVM: VMX: Properly handle kvm_read/write_guest_virt*()
result"). I don't think the problems are only on the VMX side.

> +
> + return kvm_handle_invpcid(vcpu, type, operand.pcid, operand.gla);
> +}
> +
> static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
> [SVM_EXIT_READ_CR0] = cr_interception,
> [SVM_EXIT_READ_CR3] = cr_interception,
> @@ -2777,6 +2831,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
> [SVM_EXIT_MWAIT] = mwait_interception,
> [SVM_EXIT_XSETBV] = xsetbv_interception,
> [SVM_EXIT_RDPRU] = rdpru_interception,
> + [SVM_EXIT_INVPCID] = invpcid_interception,
> [SVM_EXIT_NPF] = npf_interception,
> [SVM_EXIT_RSM] = rsm_interception,
> [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
> @@ -3562,6 +3617,15 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
> svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
> guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
>
> + /* Check again if INVPCID interception if required */
> + if (boot_cpu_has(X86_FEATURE_INVPCID) &&

Again, shouldn't this be 'kvm_cpu_cap_has(X86_FEATURE_INVPCID)'?
(Better, perhaps, would be to extract this common block of code into a
separate function to be called from both places.)

> + guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
> + if (!npt_enabled)
> + set_intercept(svm, INTERCEPT_INVPCID);
> + else
> + clr_intercept(svm, INTERCEPT_INVPCID);
> + }
> +
> if (!kvm_vcpu_apicv_active(vcpu))
> return;
>
>