[PATCH] KVM: SVM: Re-inject soft interrupts instead of retrying instruction

From: Sean Christopherson
Date: Fri Apr 01 2022 - 13:01:18 EST


Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/kvm/svm/svm.c | 101 +++++++++++++++++++++++++++++------------
arch/x86/kvm/svm/svm.h | 4 +-
2 files changed, 73 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2c86bd9176c6..c534d00ae194 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -370,6 +370,45 @@ static int svm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
return 1;
}

+static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu)
+{
+ unsigned long rip, old_rip = kvm_rip_read(vcpu);
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ /*
+ * Due to architectural shortcomings, the CPU doesn't always provide
+ * NextRIP, e.g. if KVM intercepted an exception that occurred while
+ * the CPU was vectoring an INTO/INT3 in the guest. Temporarily skip
+ * the instruction even if NextRIP is supported to acquire the next
+ * RIP so that it can be shoved into the NextRIP field, otherwise
+ * hardware will fail to advance guest RIP during event injection.
+ * Drop the exception/interrupt if emulation fails and effectively
+ * retry the instruction, it's the least awful option.
+ */
+ if (!svm_skip_emulated_instruction(vcpu))
+ return -EIO;
+
+ rip = kvm_rip_read(vcpu);
+
+ /*
+ * If NextRIP is supported, rewind RIP and update NextRip. If NextRip
+ * isn't supported, keep the result of the skip as the CPU obviously
+ * won't advance RIP, but stash away the injection information so that
+ * RIP can be unwound if injection fails.
+ */
+ if (nrips) {
+ kvm_rip_write(vcpu, old_rip);
+ svm->vmcb->control.next_rip = rip;
+ } else {
+ if (static_cpu_has(X86_FEATURE_NRIPS))
+ svm->vmcb->control.next_rip = rip;
+
+ svm->soft_int_linear_rip = rip + svm->vmcb->save.cs.base;
+ svm->soft_int_injected = rip - old_rip;
+ }
+ return 0;
+}
+
static void svm_queue_exception(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -379,21 +418,9 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)

kvm_deliver_exception_payload(vcpu);

- if (nr == BP_VECTOR && !nrips) {
- unsigned long rip, old_rip = kvm_rip_read(vcpu);
-
- /*
- * For guest debugging where we have to reinject #BP if some
- * INT3 is guest-owned:
- * Emulate nRIP by moving RIP forward. Will fail if injection
- * raises a fault that is not intercepted. Still better than
- * failing in all cases.
- */
- (void)svm_skip_emulated_instruction(vcpu);
- rip = kvm_rip_read(vcpu);
- svm->int3_rip = rip + svm->vmcb->save.cs.base;
- svm->int3_injected = rip - old_rip;
- }
+ if (kvm_exception_is_soft(nr) &&
+ svm_update_soft_interrupt_rip(vcpu))
+ return;

svm->vmcb->control.event_inj = nr
| SVM_EVTINJ_VALID
@@ -3382,14 +3409,24 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
static void svm_inject_irq(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ u32 type;

WARN_ON(!gif_set(svm));

+ if (vcpu->arch.interrupt.soft) {
+ if (svm_update_soft_interrupt_rip(vcpu))
+ return;
+
+ type = SVM_EVTINJ_TYPE_SOFT;
+ } else {
+ type = SVM_EVTINJ_TYPE_INTR;
+ }
+
trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
++vcpu->stat.irq_injections;

svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
- SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
+ SVM_EVTINJ_VALID | type;
}

void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
@@ -3672,9 +3709,9 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
u8 vector;
int type;
u32 exitintinfo = svm->vmcb->control.exit_int_info;
- unsigned int3_injected = svm->int3_injected;
+ unsigned soft_int_injected = svm->soft_int_injected;

- svm->int3_injected = 0;
+ svm->soft_int_injected = 0;

/*
* If we've made progress since setting HF_IRET_MASK, we've
@@ -3694,6 +3731,18 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
if (!(exitintinfo & SVM_EXITINTINFO_VALID))
return;

+ /*
+ * If NextRIP isn't enabled, KVM must manually advance RIP prior to
+ * injecting the soft exception/interrupt. That advancement needs to
+ * be unwound if vectoring didn't complete. Note, the _new_ event may
+ * not be the injected event, e.g. if KVM injected an INTn, the INTn
+ * hit a #NP in the guest, and the #NP encountered a #PF, the #NP will
+ * be the reported vectored event, but RIP still needs to be unwound.
+ */
+ if (soft_int_injected &&
+ kvm_is_linear_rip(vcpu, to_svm(vcpu)->soft_int_linear_rip))
+ kvm_rip_write(vcpu, kvm_rip_read(vcpu) - soft_int_injected);
+
kvm_make_request(KVM_REQ_EVENT, vcpu);

vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
@@ -3710,18 +3759,6 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
if (vector == X86_TRAP_VC)
break;

- /*
- * In case of software exceptions, do not reinject the vector,
- * but re-execute the instruction instead. Rewind RIP first
- * if we emulated INT3 before.
- */
- if (kvm_exception_is_soft(vector)) {
- if (vector == BP_VECTOR && int3_injected &&
- kvm_is_linear_rip(vcpu, svm->int3_rip))
- kvm_rip_write(vcpu,
- kvm_rip_read(vcpu) - int3_injected);
- break;
- }
if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
u32 err = svm->vmcb->control.exit_int_info_err;
kvm_requeue_exception_e(vcpu, vector, err);
@@ -3732,9 +3769,13 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
case SVM_EXITINTINFO_TYPE_INTR:
kvm_queue_interrupt(vcpu, vector, false);
break;
+ case SVM_EXITINTINFO_TYPE_SOFT:
+ kvm_queue_interrupt(vcpu, vector, true);
+ break;
default:
break;
}
+
}

static void svm_cancel_injection(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 47e7427d0395..a770a1c7ddd2 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -230,8 +230,8 @@ struct vcpu_svm {
bool nmi_singlestep;
u64 nmi_singlestep_guest_rflags;

- unsigned int3_injected;
- unsigned long int3_rip;
+ unsigned soft_int_injected;
+ unsigned long soft_int_linear_rip;

/* optional nested SVM features that are enabled for this guest */
bool nrips_enabled : 1;

base-commit: 26f97f8db06dc08a2b6a48692cdc1d89b288905d
--
2.35.1.1094.g7c7d902a7c-goog


--ZgLXG3WJga0Bzibl--