Re: [PATCH 06/10] KVM: x86: do not load vmcs12 pages while still in SMM

From: Paolo Bonzini
Date: Tue Jul 31 2018 - 03:40:06 EST


On 30/07/2018 21:27, Jim Mattson wrote:
> On Sat, Jul 28, 2018 at 4:10 PM, Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote:
>> If the vCPU enters system management mode while running a nested guest,
>> RSM starts processing the vmentry while still in SMM. In that case,
>> however, the pages pointed to by the vmcs12 might be incorrectly
>> loaded from SMRAM. To avoid this, delay the handling of the pages
>> until just before the next vmentry. This is done with a new request
>> and a new entry in kvm_x86_ops, which we will be able to reuse for
>> nested VMX state migration.
>>
>> Extracted from a patch by Jim Mattson and KarimAllah Ahmed.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
>> ---
>> arch/x86/include/asm/kvm_host.h | 3 +++
>> arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++--------------
>> arch/x86/kvm/x86.c | 2 ++
>> 3 files changed, 40 insertions(+), 18 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index c13cd28d9d1b..da957725992d 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -75,6 +75,7 @@
>> #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
>> #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
>> #define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
>> +#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24)
>>
>> #define CR0_RESERVED_BITS \
>> (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
>> @@ -1085,6 +1086,8 @@ struct kvm_x86_ops {
>>
>> void (*setup_mce)(struct kvm_vcpu *vcpu);
>>
>> + void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
>> +
>> int (*smi_allowed)(struct kvm_vcpu *vcpu);
>> int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
>> int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 2630ab38d72c..17aede06ae0e 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -10636,9 +10636,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
>> static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
>> struct vmcs12 *vmcs12);
>>
>> -static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
>> - struct vmcs12 *vmcs12)
>> +static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
>> {
>> + struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>> struct vcpu_vmx *vmx = to_vmx(vcpu);
>> struct page *page;
>> u64 hpa;
>> @@ -11750,13 +11750,18 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
>> return 0;
>> }
>>
>> -static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
>> +/*
>> + * If p_exit_qual is NULL, this is being called from state restore (either
>> + * kvm_set_nested_state or RSM). Otherwise it's called from vmlaunch/vmresume.
>> + */
>> +static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *p_exit_qual)
>> {
>> struct vcpu_vmx *vmx = to_vmx(vcpu);
>> struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>> + bool from_vmentry = !!p_exit_qual;
>> u32 msr_entry_idx;
>> - u32 exit_qual;
>> - int r;
>> + u32 dummy_exit_qual;
>> + int r = 0;
>>
>> enter_guest_mode(vcpu);
>>
>> @@ -11770,17 +11775,27 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
>> vcpu->arch.tsc_offset += vmcs12->tsc_offset;
>>
>> r = EXIT_REASON_INVALID_STATE;
>> - if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
>> + if (prepare_vmcs02(vcpu, vmcs12, from_vmentry ? p_exit_qual : &dummy_exit_qual))
>> goto fail;
>>
>> - nested_get_vmcs12_pages(vcpu, vmcs12);
>> + if (from_vmentry) {
>> + nested_get_vmcs12_pages(vcpu);
>>
>> - r = EXIT_REASON_MSR_LOAD_FAIL;
>> - msr_entry_idx = nested_vmx_load_msr(vcpu,
>> - vmcs12->vm_entry_msr_load_addr,
>> - vmcs12->vm_entry_msr_load_count);
>> - if (msr_entry_idx)
>> - goto fail;
>> + r = EXIT_REASON_MSR_LOAD_FAIL;
>> + msr_entry_idx = nested_vmx_load_msr(vcpu,
>> + vmcs12->vm_entry_msr_load_addr,
>> + vmcs12->vm_entry_msr_load_count);
>> + if (msr_entry_idx)
>> + goto fail;
>> + } else {
>> + /*
>> + * The MMU is not initialized to point at the right entities yet and
>> + * "get pages" would need to read data from the guest (i.e. we will
>> + * need to perform gpa to hpa translation). Request a call
>> + * to nested_get_vmcs12_pages before the next VM-entry.
>> + */
>> + kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
>> + }
>
> The from_vmentry variable seems unnecessary. Why not (a) hoist the
> call to nested_vmx_load_msr() into nested_vmx_run and (b) always use
> the deferred KVM_REQ_GET_VMCS12_PAGES?

Yeah, I thought about that. from_vmentry is ugly.

However, for (a) you would have to duplicate the code at the "fail"
label, which is uglier than from_vmentry; as to (b), it is a little bit
slower and our nested vmentry is already slow enough...

Paolo