Re: [PATCH 06/10] KVM: x86: do not load vmcs12 pages while still in SMM

From: Jim Mattson
Date: Mon Jul 30 2018 - 15:27:52 EST


On Sat, Jul 28, 2018 at 4:10 PM, Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote:
> If the vCPU enters system management mode while running a nested guest,
> RSM starts processing the vmentry while still in SMM. In that case,
> however, the pages pointed to by the vmcs12 might be incorrectly
> loaded from SMRAM. To avoid this, delay the handling of the pages
> until just before the next vmentry. This is done with a new request
> and a new entry in kvm_x86_ops, which we will be able to reuse for
> nested VMX state migration.
>
> Extracted from a patch by Jim Mattson and KarimAllah Ahmed.
>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 3 +++
> arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++--------------
> arch/x86/kvm/x86.c | 2 ++
> 3 files changed, 40 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index c13cd28d9d1b..da957725992d 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -75,6 +75,7 @@
> #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
> #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
> #define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
> +#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24)
>
> #define CR0_RESERVED_BITS \
> (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
> @@ -1085,6 +1086,8 @@ struct kvm_x86_ops {
>
> void (*setup_mce)(struct kvm_vcpu *vcpu);
>
> + void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
> +
> int (*smi_allowed)(struct kvm_vcpu *vcpu);
> int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
> int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 2630ab38d72c..17aede06ae0e 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -10636,9 +10636,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
> static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
> struct vmcs12 *vmcs12);
>
> -static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
> - struct vmcs12 *vmcs12)
> +static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
> {
> + struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> struct page *page;
> u64 hpa;
> @@ -11750,13 +11750,18 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
> return 0;
> }
>
> -static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
> +/*
> + * If p_exit_qual is NULL, this is being called from state restore (either
> + * kvm_set_nested_state or RSM). Otherwise it's called from vmlaunch/vmresume.
> + */
> +static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *p_exit_qual)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> + bool from_vmentry = !!p_exit_qual;
> u32 msr_entry_idx;
> - u32 exit_qual;
> - int r;
> + u32 dummy_exit_qual;
> + int r = 0;
>
> enter_guest_mode(vcpu);
>
> @@ -11770,17 +11775,27 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
> vcpu->arch.tsc_offset += vmcs12->tsc_offset;
>
> r = EXIT_REASON_INVALID_STATE;
> - if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
> + if (prepare_vmcs02(vcpu, vmcs12, from_vmentry ? p_exit_qual : &dummy_exit_qual))
> goto fail;
>
> - nested_get_vmcs12_pages(vcpu, vmcs12);
> + if (from_vmentry) {
> + nested_get_vmcs12_pages(vcpu);
>
> - r = EXIT_REASON_MSR_LOAD_FAIL;
> - msr_entry_idx = nested_vmx_load_msr(vcpu,
> - vmcs12->vm_entry_msr_load_addr,
> - vmcs12->vm_entry_msr_load_count);
> - if (msr_entry_idx)
> - goto fail;
> + r = EXIT_REASON_MSR_LOAD_FAIL;
> + msr_entry_idx = nested_vmx_load_msr(vcpu,
> + vmcs12->vm_entry_msr_load_addr,
> + vmcs12->vm_entry_msr_load_count);
> + if (msr_entry_idx)
> + goto fail;
> + } else {
> + /*
> + * The MMU is not initialized to point at the right entities yet and
> + * "get pages" would need to read data from the guest (i.e. we will
> + * need to perform gpa to hpa translation). Request a call
> + * to nested_get_vmcs12_pages before the next VM-entry.
> + */
> + kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
> + }

The from_vmentry variable seems unnecessary. Why not (a) hoist the
call to nested_vmx_load_msr() into nested_vmx_run and (b) always use
the deferred KVM_REQ_GET_VMCS12_PAGES?

>
> /*
> * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
> @@ -11795,8 +11810,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
> vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
> leave_guest_mode(vcpu);
> vmx_switch_vmcs(vcpu, &vmx->vmcs01);
> - nested_vmx_entry_failure(vcpu, vmcs12, r, exit_qual);
> - return 1;
> + return r;
> }
>
> /*
> @@ -11873,10 +11887,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
> */
>
> vmx->nested.nested_run_pending = 1;
> - ret = enter_vmx_non_root_mode(vcpu);
> + ret = enter_vmx_non_root_mode(vcpu, &exit_qual);
> if (ret) {
> + nested_vmx_entry_failure(vcpu, vmcs12, ret, exit_qual);
> vmx->nested.nested_run_pending = 0;
> - return ret;
> + return 1;
> }
>
> /*
> @@ -12962,7 +12977,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
>
> if (vmx->nested.smm.guest_mode) {
> vcpu->arch.hflags &= ~HF_SMM_MASK;
> - ret = enter_vmx_non_root_mode(vcpu);
> + ret = enter_vmx_non_root_mode(vcpu, NULL);
> vcpu->arch.hflags |= HF_SMM_MASK;
> if (ret)
> return ret;
> @@ -13111,6 +13126,8 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
>
> .setup_mce = vmx_setup_mce,
>
> + .get_vmcs12_pages = nested_get_vmcs12_pages,
> +
> .smi_allowed = vmx_smi_allowed,
> .pre_enter_smm = vmx_pre_enter_smm,
> .pre_leave_smm = vmx_pre_leave_smm,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 2b812b3c5088..8ddf5f94876f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7257,6 +7257,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> bool req_immediate_exit = false;
>
> if (kvm_request_pending(vcpu)) {
> + if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
> + kvm_x86_ops->get_vmcs12_pages(vcpu);
> if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
> kvm_mmu_unload(vcpu);
> if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
> --
> 1.8.3.1
>