Re: [PATCH 5.5 111/176] KVM: nVMX: Emulate MTF when performing instruction emulation

From: Oliver Upton
Date: Wed Mar 04 2020 - 02:39:56 EST


On Tue, Mar 3, 2020 at 11:23 PM Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote:
>
> On 03/03/20 18:42, Greg Kroah-Hartman wrote:
> > From: Oliver Upton <oupton@xxxxxxxxxx>
> >
> > commit 5ef8acbdd687c9d72582e2c05c0b9756efb37863 upstream.
> >
> > Since commit 5f3d45e7f282 ("kvm/x86: add support for
> > MONITOR_TRAP_FLAG"), KVM has allowed an L1 guest to use the monitor trap
> > flag processor-based execution control for its L2 guest. KVM simply
> > forwards any MTF VM-exits to the L1 guest, which works for normal
> > instruction execution.
> >
> > However, when KVM needs to emulate an instruction on the behalf of an L2
> > guest, the monitor trap flag is not emulated. Add the necessary logic to
> > kvm_skip_emulated_instruction() to synthesize an MTF VM-exit to L1 upon
> > instruction emulation for L2.
> >
> > Fixes: 5f3d45e7f282 ("kvm/x86: add support for MONITOR_TRAP_FLAG")
> > Signed-off-by: Oliver Upton <oupton@xxxxxxxxxx>
> > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> > Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
>
> Why is this included in a stable release? It was part of a series of
> four patches and the prerequisites as far as I can see are not part of 5.5.

It looks like these commits were already picked from upstream:

684c0422da71 ("KVM: nVMX: Handle pending #DB when injecting INIT VM-exit")
307f1cfa2696 ("KVM: x86: Mask off reserved bit from #DB exception payload")

Which were bug fixes in their own right and were sensible for
backporting (though I didn't cc stable if I'm not mistaken).

but not:

a06230b62b89 ("KVM: x86: Deliver exception payload on KVM_GET_VCPU_EVENTS")

which this patch absolutely depends on.

Otherwise, I'll defer discussions regarding the suitability of this
patch for stable to Paolo.

--
Thanks,
Oliver

> I have already said half a dozen times that I don't want any of the
> autopick stuff for KVM. Is a Fixes tag sufficient to get patches into
> stable now?
>
> Paolo
>
> > ---
> > arch/x86/include/asm/kvm_host.h | 1 +
> > arch/x86/include/uapi/asm/kvm.h | 1 +
> > arch/x86/kvm/svm.c | 1 +
> > arch/x86/kvm/vmx/nested.c | 35 ++++++++++++++++++++++++++++++++++-
> > arch/x86/kvm/vmx/nested.h | 5 +++++
> > arch/x86/kvm/vmx/vmx.c | 37 ++++++++++++++++++++++++++++++++++++-
> > arch/x86/kvm/vmx/vmx.h | 3 +++
> > arch/x86/kvm/x86.c | 2 ++
> > 8 files changed, 83 insertions(+), 2 deletions(-)
> >
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1092,6 +1092,7 @@ struct kvm_x86_ops {
> > void (*run)(struct kvm_vcpu *vcpu);
> > int (*handle_exit)(struct kvm_vcpu *vcpu);
> > int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
> > + void (*update_emulated_instruction)(struct kvm_vcpu *vcpu);
> > void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
> > u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
> > void (*patch_hypercall)(struct kvm_vcpu *vcpu,
> > --- a/arch/x86/include/uapi/asm/kvm.h
> > +++ b/arch/x86/include/uapi/asm/kvm.h
> > @@ -390,6 +390,7 @@ struct kvm_sync_regs {
> > #define KVM_STATE_NESTED_GUEST_MODE 0x00000001
> > #define KVM_STATE_NESTED_RUN_PENDING 0x00000002
> > #define KVM_STATE_NESTED_EVMCS 0x00000004
> > +#define KVM_STATE_NESTED_MTF_PENDING 0x00000008
> >
> > #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
> > #define KVM_STATE_NESTED_SMM_VMXON 0x00000002
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -7311,6 +7311,7 @@ static struct kvm_x86_ops svm_x86_ops __
> > .run = svm_vcpu_run,
> > .handle_exit = handle_exit,
> > .skip_emulated_instruction = skip_emulated_instruction,
> > + .update_emulated_instruction = NULL,
> > .set_interrupt_shadow = svm_set_interrupt_shadow,
> > .get_interrupt_shadow = svm_get_interrupt_shadow,
> > .patch_hypercall = svm_patch_hypercall,
> > --- a/arch/x86/kvm/vmx/nested.c
> > +++ b/arch/x86/kvm/vmx/nested.c
> > @@ -3616,8 +3616,15 @@ static int vmx_check_nested_events(struc
> > unsigned long exit_qual;
> > bool block_nested_events =
> > vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
> > + bool mtf_pending = vmx->nested.mtf_pending;
> > struct kvm_lapic *apic = vcpu->arch.apic;
> >
> > + /*
> > + * Clear the MTF state. If a higher priority VM-exit is delivered first,
> > + * this state is discarded.
> > + */
> > + vmx->nested.mtf_pending = false;
> > +
> > if (lapic_in_kernel(vcpu) &&
> > test_bit(KVM_APIC_INIT, &apic->pending_events)) {
> > if (block_nested_events)
> > @@ -3628,8 +3635,28 @@ static int vmx_check_nested_events(struc
> > return 0;
> > }
> >
> > + /*
> > + * Process any exceptions that are not debug traps before MTF.
> > + */
> > + if (vcpu->arch.exception.pending &&
> > + !vmx_pending_dbg_trap(vcpu) &&
> > + nested_vmx_check_exception(vcpu, &exit_qual)) {
> > + if (block_nested_events)
> > + return -EBUSY;
> > + nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
> > + return 0;
> > + }
> > +
> > + if (mtf_pending) {
> > + if (block_nested_events)
> > + return -EBUSY;
> > + nested_vmx_update_pending_dbg(vcpu);
> > + nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0);
> > + return 0;
> > + }
> > +
> > if (vcpu->arch.exception.pending &&
> > - nested_vmx_check_exception(vcpu, &exit_qual)) {
> > + nested_vmx_check_exception(vcpu, &exit_qual)) {
> > if (block_nested_events)
> > return -EBUSY;
> > nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
> > @@ -5742,6 +5769,9 @@ static int vmx_get_nested_state(struct k
> >
> > if (vmx->nested.nested_run_pending)
> > kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
> > +
> > + if (vmx->nested.mtf_pending)
> > + kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
> > }
> > }
> >
> > @@ -5922,6 +5952,9 @@ static int vmx_set_nested_state(struct k
> > vmx->nested.nested_run_pending =
> > !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
> >
> > + vmx->nested.mtf_pending =
> > + !!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING);
> > +
> > ret = -EINVAL;
> > if (nested_cpu_has_shadow_vmcs(vmcs12) &&
> > vmcs12->vmcs_link_pointer != -1ull) {
> > --- a/arch/x86/kvm/vmx/nested.h
> > +++ b/arch/x86/kvm/vmx/nested.h
> > @@ -176,6 +176,11 @@ static inline bool nested_cpu_has_virtua
> > return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
> > }
> >
> > +static inline int nested_cpu_has_mtf(struct vmcs12 *vmcs12)
> > +{
> > + return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
> > +}
> > +
> > static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
> > {
> > return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -1595,6 +1595,40 @@ static int skip_emulated_instruction(str
> > return 1;
> > }
> >
> > +
> > +/*
> > + * Recognizes a pending MTF VM-exit and records the nested state for later
> > + * delivery.
> > + */
> > +static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu)
> > +{
> > + struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> > + struct vcpu_vmx *vmx = to_vmx(vcpu);
> > +
> > + if (!is_guest_mode(vcpu))
> > + return;
> > +
> > + /*
> > + * Per the SDM, MTF takes priority over debug-trap exceptions besides
> > + * T-bit traps. As instruction emulation is completed (i.e. at the
> > + * instruction boundary), any #DB exception pending delivery must be a
> > + * debug-trap. Record the pending MTF state to be delivered in
> > + * vmx_check_nested_events().
> > + */
> > + if (nested_cpu_has_mtf(vmcs12) &&
> > + (!vcpu->arch.exception.pending ||
> > + vcpu->arch.exception.nr == DB_VECTOR))
> > + vmx->nested.mtf_pending = true;
> > + else
> > + vmx->nested.mtf_pending = false;
> > +}
> > +
> > +static int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu)
> > +{
> > + vmx_update_emulated_instruction(vcpu);
> > + return skip_emulated_instruction(vcpu);
> > +}
> > +
> > static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
> > {
> > /*
> > @@ -7886,7 +7920,8 @@ static struct kvm_x86_ops vmx_x86_ops __
> >
> > .run = vmx_vcpu_run,
> > .handle_exit = vmx_handle_exit,
> > - .skip_emulated_instruction = skip_emulated_instruction,
> > + .skip_emulated_instruction = vmx_skip_emulated_instruction,
> > + .update_emulated_instruction = vmx_update_emulated_instruction,
> > .set_interrupt_shadow = vmx_set_interrupt_shadow,
> > .get_interrupt_shadow = vmx_get_interrupt_shadow,
> > .patch_hypercall = vmx_patch_hypercall,
> > --- a/arch/x86/kvm/vmx/vmx.h
> > +++ b/arch/x86/kvm/vmx/vmx.h
> > @@ -150,6 +150,9 @@ struct nested_vmx {
> > /* L2 must run next, and mustn't decide to exit to L1. */
> > bool nested_run_pending;
> >
> > + /* Pending MTF VM-exit into L1. */
> > + bool mtf_pending;
> > +
> > struct loaded_vmcs vmcs02;
> >
> > /*
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -6838,6 +6838,8 @@ restart:
> > kvm_rip_write(vcpu, ctxt->eip);
> > if (r && ctxt->tf)
> > r = kvm_vcpu_do_singlestep(vcpu);
> > + if (kvm_x86_ops->update_emulated_instruction)
> > + kvm_x86_ops->update_emulated_instruction(vcpu);
> > __kvm_set_rflags(vcpu, ctxt->eflags);
> > }
> >
> >
> >
>