Re: [PATCH v2] KVM: x86: nSVM/nVMX: Fix handling triple fault on RSM instruction

From: Sean Christopherson
Date: Fri Feb 09 2024 - 10:12:54 EST


On Thu, Feb 08, 2024, Paolo Bonzini wrote:
> On Thu, Feb 8, 2024 at 2:18 PM Wilczynski, Michal
> <michal.wilczynski@xxxxxxxxx> wrote:
> > Hi, I've tested the patch and it seems to work, both on Intel and AMD.
> > There was a problem with applying this chunk though:
> >
> > diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
> > index ac8b7614e79d..3d18fa7db353 100644
> > --- a/arch/x86/include/asm/kvm-x86-ops.h
> > +++ b/arch/x86/include/asm/kvm-x86-ops.h
> > @@ -119,7 +119,8 @@ KVM_X86_OP(setup_mce)
> > #ifdef CONFIG_KVM_SMM
> > KVM_X86_OP(smi_allowed)
> > KVM_X86_OP() // <- This shouldn't be there I guess ?
> > -KVM_X86_OP(leave_smm)
> > +KVM_X86_OP(leave_smm_prepare)
> > +KVM_X86_OP(leave_smm_commit)
> > KVM_X86_OP(enable_smi_window)
> > #endif
> > KVM_X86_OP_OPTIONAL(dev_get_attr)
> >
> > Anyway I was a bit averse to this approach as I noticed in the git log
> > that callbacks like e.g post_leave_smm() used to exist, but they were later
> > removed, so I though the maintainers don't like introducing extra
> > callbacks.
>
> If they are needed, it's fine. In my opinion a new callback is easier
> to handle and understand than new state.

Yeah, we ripped out post_leave_smm() because its sole usage at the time was buggy,
and having a callback without a purpose would just be dead code.

> > > 2) otherwise, if the problem is that we have not gone through the
> > > vmenter yet, then KVM needs to do that and _then_ inject the triple
> > > fault. The fix is to merge the .triple_fault and .check_nested_events
> > > callbacks, with something like the second attached patch - which
> > > probably has so many problems that I haven't even tried to compile it.
> >
> > Well, in this case if we know that RSM will fail it doesn't seem to me
> > like it make sense to run vmenter just do kill the VM anyway, this would
> > be more confusing.
>
> Note that the triple fault must not kill the VM, it's just causing a
> nested vmexit from L2 to L1. KVM's algorithm to inject a
> vmexit-causing event is always to first ensure that the VMCS02 (VMCB02
> for AMD) is consistent, and only then trigger the vmexit. So if patch
> 2 or something like it works, that would be even better.
>
> > I've made the fix this way based on our discussion with Sean in v1, and
> > tried to mark the RSM instruction with a flag, as a one that needs
> > actual HW VMenter to complete succesfully, and based on that information
> > manipulate nested_run_pending.

Heh, you misunderstood my suggestion.

: But due to nested_run_pending being (unnecessarily) buried in vendor structs, it
: might actually be easier to do a cleaner fix. E.g. add yet another flag to track
: that a hardware VM-Enter needs to be completed in order to complete instruction
: emulation.

I didn't mean add a flag to the emulator to muck with nested_run_pending, I meant
add a flag to kvm_vcpu_arch to be a superset of nested_run_pending. E.g. as a
first step, something like the below. And then as follow up, see if it's doable
to propagate nested_run_pending => insn_emulation_needs_vmenter so that the
nested_run_pending checks in {svm,vmx}_{interrupt,nmi,smi}_allowed() can be
dropped.

---
arch/x86/include/asm/kvm_host.h | 8 ++++++
arch/x86/kvm/smm.c | 10 ++++++--
arch/x86/kvm/x86.c | 44 +++++++++++++++++++++++++--------
3 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d271ba20a0b2..bb4250551619 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -769,6 +769,14 @@ struct kvm_vcpu_arch {
u64 ia32_misc_enable_msr;
u64 smbase;
u64 smi_count;
+
+ /*
+ * Tracks if a successful VM-Enter is needed to complete emulation of
+ * an instruction, e.g. to ensure emulation of RSM or nested VM-Enter,
+ * which can directly inject events, completes before KVM attempts to
+ * inject new events.
+ */
+ bool insn_emulation_needs_vmenter;
bool at_instruction_boundary;
bool tpr_access_reporting;
bool xfd_no_write_intercept;
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index dc3d95fdca7d..c6e597b8c794 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -640,8 +640,14 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)

#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- return rsm_load_state_64(ctxt, &smram.smram64);
+ ret = rsm_load_state_64(ctxt, &smram.smram64);
else
#endif
- return rsm_load_state_32(ctxt, &smram.smram32);
+ ret = rsm_load_state_32(ctxt, &smram.smram32);
+
+ if (ret != X86EMUL_CONTINUE)
+ return ret;
+
+ vcpu->arch.insn_emulation_needs_vmenter = true;
+ return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bf10a9073a09..21a7183bbf69 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10195,6 +10195,30 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu)
return kvm_x86_ops.nested_ops->check_events(vcpu);
}

+static int kvm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
+{
+ if (vcpu->arch.insn_emulation_needs_vmenter)
+ return -EBUSY;
+
+ return static_call(kvm_x86_interrupt_allowed)(vcpu, for_injection);
+}
+
+static int kvm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
+{
+ if (vcpu->arch.insn_emulation_needs_vmenter)
+ return -EBUSY;
+
+ return static_call(kvm_x86_smi_allowed)(vcpu, for_injection)
+}
+
+static int kvm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
+{
+ if (vcpu->arch.insn_emulation_needs_vmenter)
+ return -EBUSY;
+
+ return x86_nmi_static_call(kvm_x86_nmi_allowed)(vcpu, for_injection);
+}
+
static void kvm_inject_exception(struct kvm_vcpu *vcpu)
{
/*
@@ -10384,7 +10408,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
*/
#ifdef CONFIG_KVM_SMM
if (vcpu->arch.smi_pending) {
- r = can_inject ? static_call(kvm_x86_smi_allowed)(vcpu, true) : -EBUSY;
+ r = can_inject ? kvm_smi_allowed(vcpu, true) : -EBUSY;
if (r < 0)
goto out;
if (r) {
@@ -10398,7 +10422,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
#endif

if (vcpu->arch.nmi_pending) {
- r = can_inject ? static_call(kvm_x86_nmi_allowed)(vcpu, true) : -EBUSY;
+ r = can_inject ? kvm_nmi_allowed(vcpu, true) : -EBUSY;
if (r < 0)
goto out;
if (r) {
@@ -10406,14 +10430,14 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
vcpu->arch.nmi_injected = true;
static_call(kvm_x86_inject_nmi)(vcpu);
can_inject = false;
- WARN_ON(static_call(kvm_x86_nmi_allowed)(vcpu, true) < 0);
+ WARN_ON_ONCE(kvm_nmi_allowed() < 0);
}
if (vcpu->arch.nmi_pending)
static_call(kvm_x86_enable_nmi_window)(vcpu);
}

if (kvm_cpu_has_injectable_intr(vcpu)) {
- r = can_inject ? static_call(kvm_x86_interrupt_allowed)(vcpu, true) : -EBUSY;
+ r = can_inject ? kvm_interrupt_allowed(vcpu, true) : -EBUSY;
if (r < 0)
goto out;
if (r) {
@@ -10422,7 +10446,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
if (!WARN_ON_ONCE(irq == -1)) {
kvm_queue_interrupt(vcpu, irq, false);
static_call(kvm_x86_inject_irq)(vcpu, false);
- WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
+ WARN_ON(kvm_interrupt_allowed(vcpu, true) < 0);
}
}
if (kvm_cpu_has_injectable_intr(vcpu))
@@ -10969,6 +10993,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
WARN_ON_ONCE((kvm_vcpu_apicv_activated(vcpu) != kvm_vcpu_apicv_active(vcpu)) &&
(kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED));

+ vcpu->arch.insn_emulation_needs_vmenter = false;
+
exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu);
if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
break;
@@ -13051,14 +13077,12 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
return true;

if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
- (vcpu->arch.nmi_pending &&
- static_call(kvm_x86_nmi_allowed)(vcpu, false)))
+ (vcpu->arch.nmi_pending && kvm_nmi_allowed(vcpu, false)))
return true;

#ifdef CONFIG_KVM_SMM
if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
- (vcpu->arch.smi_pending &&
- static_call(kvm_x86_smi_allowed)(vcpu, false)))
+ (vcpu->arch.smi_pending && kvm_smi_allowed(vcpu, false)))
return true;
#endif

@@ -13136,7 +13160,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)

int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
- return static_call(kvm_x86_interrupt_allowed)(vcpu, false);
+ return kvm_interrupt_allowed(vcpu, false);
}

unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)

base-commit: f8fe663bc413d2a14ab9a452638a99b975011a9d
--