Re: [PATCH 12/13] KVM: x86: Replace late check_nested_events() hack with more precise fix

From: Paolo Bonzini
Date: Thu Apr 23 2020 - 07:00:22 EST


On 23/04/20 04:25, Sean Christopherson wrote:
> Add a separate hook for checking if interrupt injection is blocked and
> use the hook to handle the case where an interrupt arrives between
> check_nested_events() and the injection logic. Drop the retry of
> check_nested_events() that hack-a-fixed the same condition.
>
> Blocking injection is also a bit of a hack, e.g. KVM should do exiting
> and non-exiting interrupt processing in a single pass, but it's a more
> precise hack. The old comment is also misleading, e.g. KVM_REQ_EVENT is
> purely an optimization, setting it on every run loop (which KVM doesn't
> do) should not affect functionality, only performance.
>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/svm/svm.c | 1 +
> arch/x86/kvm/vmx/vmx.c | 13 +++++++++++++
> arch/x86/kvm/x86.c | 22 ++++------------------
> 4 files changed, 19 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 787636acd648..16fdeddb4a65 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1140,6 +1140,7 @@ struct kvm_x86_ops {
> void (*queue_exception)(struct kvm_vcpu *vcpu);
> void (*cancel_injection)(struct kvm_vcpu *vcpu);
> bool (*interrupt_allowed)(struct kvm_vcpu *vcpu);
> + bool (*interrupt_injection_allowed)(struct kvm_vcpu *vcpu);
> bool (*nmi_allowed)(struct kvm_vcpu *vcpu);
> bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
> void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index f21f734861dd..6d3ccbfc9e6a 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3993,6 +3993,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
> .queue_exception = svm_queue_exception,
> .cancel_injection = svm_cancel_injection,
> .interrupt_allowed = svm_interrupt_allowed,
> + .interrupt_injection_allowed = svm_interrupt_allowed,
> .nmi_allowed = svm_nmi_allowed,
> .get_nmi_mask = svm_get_nmi_mask,
> .set_nmi_mask = svm_set_nmi_mask,
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 2f8cacb3aa9b..68b3748b5383 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -4550,6 +4550,18 @@ static bool vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
> return !vmx_interrupt_blocked(vcpu);
> }
>
> +static bool vmx_interrupt_injection_allowed(struct kvm_vcpu *vcpu)
> +{
> + /*
> + * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
> + * e.g. if the IRQ arrived asynchronously after checking nested events.
> + */
> + if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
> + return false;
> +
> + return vmx_interrupt_allowed(vcpu);
> +}
> +
> static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
> {
> int ret;
> @@ -7823,6 +7835,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
> .queue_exception = vmx_queue_exception,
> .cancel_injection = vmx_cancel_injection,
> .interrupt_allowed = vmx_interrupt_allowed,
> + .interrupt_injection_allowed = vmx_interrupt_injection_allowed,
> .nmi_allowed = vmx_nmi_allowed,
> .get_nmi_mask = vmx_get_nmi_mask,
> .set_nmi_mask = vmx_set_nmi_mask,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 7c49a7dc601f..d9d6028a77e0 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7755,24 +7755,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
> --vcpu->arch.nmi_pending;
> vcpu->arch.nmi_injected = true;
> kvm_x86_ops.set_nmi(vcpu);
> - } else if (kvm_cpu_has_injectable_intr(vcpu)) {
> - /*
> - * Because interrupts can be injected asynchronously, we are
> - * calling check_nested_events again here to avoid a race condition.
> - * See https://lkml.org/lkml/2014/7/2/60 for discussion about this
> - * proposal and current concerns. Perhaps we should be setting
> - * KVM_REQ_EVENT only on certain events and not unconditionally?
> - */
> - if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
> - r = kvm_x86_ops.check_nested_events(vcpu);
> - if (r != 0)
> - return r;
> - }
> - if (kvm_x86_ops.interrupt_allowed(vcpu)) {
> - kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
> - false);
> - kvm_x86_ops.set_irq(vcpu);
> - }
> + } else if (kvm_cpu_has_injectable_intr(vcpu) &&
> + kvm_x86_ops.interrupt_injection_allowed(vcpu)) {
> + kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
> + kvm_x86_ops.set_irq(vcpu);

Hmm I'm interested in how this can help with AMD introducing another
instance of the late random check_nested_events. I'll play with it.

Paolo