Re: [PATCH 2/4] KVM: x86: SVM: disable preemption in avic_refresh_apicv_exec_ctrl

From: Maxim Levitsky
Date: Tue Mar 01 2022 - 12:20:59 EST


On Tue, 2022-03-01 at 17:15 +0000, Sean Christopherson wrote:
> On Tue, Mar 01, 2022, Maxim Levitsky wrote:
> > avic_refresh_apicv_exec_ctrl is called from vcpu_enter_guest,
> > without preemption disabled, however avic_vcpu_load, and
> > avic_vcpu_put expect preemption to be disabled.
> >
> > This issue was found by lockdep.
> >
> > Signed-off-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
> > ---
> > arch/x86/kvm/svm/avic.c | 4 ++++
> > 1 file changed, 4 insertions(+)
> >
> > diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> > index aea0b13773fd3..e23159f3a62ba 100644
> > --- a/arch/x86/kvm/svm/avic.c
> > +++ b/arch/x86/kvm/svm/avic.c
> > @@ -640,12 +640,16 @@ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
> > }
> > vmcb_mark_dirty(vmcb, VMCB_AVIC);
> >
> > + preempt_disable();
> > +
> > if (activated)
> > avic_vcpu_load(vcpu, vcpu->cpu);
> > else
> > avic_vcpu_put(vcpu);
> >
> > avic_set_pi_irte_mode(vcpu, activated);
> > +
> > + preempt_enable();
>
> Assuming avic_set_pi_irte_mode() doesn't need to be protected, I'd prefer the
> below patch. This is the second time we done messed this up.
>
> From: Sean Christopherson <seanjc@xxxxxxxxxx>
> Date: Tue, 1 Mar 2022 09:05:09 -0800
> Subject: [PATCH] KVM: SVM: Disable preemption across AVIC load/put during
> APICv refresh
>
> Disable preemption when loading/putting the AVIC during an APICv refresh.
> If the vCPU task is preempted and migrated ot a different pCPU, the
> unprotected avic_vcpu_load() could set the wrong pCPU in the physical ID
> cache/table.
>
> Pull the necessary code out of avic_vcpu_{,un}blocking() and into a new
> helper to reduce the probability of introducing this exact bug a third
> time.
>
> Fixes: df7e4827c549 ("KVM: SVM: call avic_vcpu_load/avic_vcpu_put when enabling/disabling AVIC")
> Cc: stable@xxxxxxxxxxxxxxx
> Reported-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> ---
> arch/x86/kvm/svm/avic.c | 103 ++++++++++++++++++++++------------------
> arch/x86/kvm/svm/svm.c | 4 +-
> arch/x86/kvm/svm/svm.h | 4 +-
> 3 files changed, 60 insertions(+), 51 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index aea0b13773fd..1afde44b1252 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -616,38 +616,6 @@ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
> return ret;
> }
>
> -void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
> -{
> - struct vcpu_svm *svm = to_svm(vcpu);
> - struct vmcb *vmcb = svm->vmcb01.ptr;
> - bool activated = kvm_vcpu_apicv_active(vcpu);
> -
> - if (!enable_apicv)
> - return;
> -
> - if (activated) {
> - /**
> - * During AVIC temporary deactivation, guest could update
> - * APIC ID, DFR and LDR registers, which would not be trapped
> - * by avic_unaccelerated_access_interception(). In this case,
> - * we need to check and update the AVIC logical APIC ID table
> - * accordingly before re-activating.
> - */
> - avic_apicv_post_state_restore(vcpu);
> - vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
> - } else {
> - vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
> - }
> - vmcb_mark_dirty(vmcb, VMCB_AVIC);
> -
> - if (activated)
> - avic_vcpu_load(vcpu, vcpu->cpu);
> - else
> - avic_vcpu_put(vcpu);
> -
> - avic_set_pi_irte_mode(vcpu, activated);
> -}
> -
> static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
> {
> unsigned long flags;
> @@ -899,7 +867,7 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
> return ret;
> }
>
> -void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> +void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> {
> u64 entry;
> /* ID = 0xff (broadcast), ID > 0xff (reserved) */
> @@ -936,7 +904,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
> }
>
> -void avic_vcpu_put(struct kvm_vcpu *vcpu)
> +void __avic_vcpu_put(struct kvm_vcpu *vcpu)
> {
> u64 entry;
> struct vcpu_svm *svm = to_svm(vcpu);
> @@ -955,13 +923,63 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
> WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
> }
>
> -void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
> +static void avic_vcpu_load(struct kvm_vcpu *vcpu)
> {
> - if (!kvm_vcpu_apicv_active(vcpu))
> - return;
> + int cpu = get_cpu();
>
> + WARN_ON(cpu != vcpu->cpu);
> +
> + __avic_vcpu_load(vcpu, cpu);
> +
> + put_cpu();
> +}
> +
> +static void avic_vcpu_put(struct kvm_vcpu *vcpu)
> +{
> preempt_disable();
>
> + __avic_vcpu_put(vcpu);
> +
> + preempt_enable();
> +}
> +
> +void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
> +{
> + struct vcpu_svm *svm = to_svm(vcpu);
> + struct vmcb *vmcb = svm->vmcb01.ptr;
> + bool activated = kvm_vcpu_apicv_active(vcpu);
> +
> + if (!enable_apicv)
> + return;
> +
> + if (activated) {
> + /**
> + * During AVIC temporary deactivation, guest could update
> + * APIC ID, DFR and LDR registers, which would not be trapped
> + * by avic_unaccelerated_access_interception(). In this case,
> + * we need to check and update the AVIC logical APIC ID table
> + * accordingly before re-activating.
> + */
> + avic_apicv_post_state_restore(vcpu);
> + vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
> + } else {
> + vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
> + }
> + vmcb_mark_dirty(vmcb, VMCB_AVIC);
> +
> + if (activated)
> + avic_vcpu_load(vcpu);
> + else
> + avic_vcpu_put(vcpu);
> +
> + avic_set_pi_irte_mode(vcpu, activated);
> +}
> +
> +void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
> +{
> + if (!kvm_vcpu_apicv_active(vcpu))
> + return;
> +
> /*
> * Unload the AVIC when the vCPU is about to block, _before_
> * the vCPU actually blocks.
> @@ -976,21 +994,12 @@ void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
> * the cause of errata #1235).
> */
> avic_vcpu_put(vcpu);
> -
> - preempt_enable();
> }
>
> void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
> {
> - int cpu;
> -
> if (!kvm_vcpu_apicv_active(vcpu))
> return;
>
> - cpu = get_cpu();
> - WARN_ON(cpu != vcpu->cpu);
> -
> - avic_vcpu_load(vcpu, cpu);
> -
> - put_cpu();
> + avic_vcpu_load(vcpu);
> }
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 7038c76fa841..c5e3f219803e 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1318,13 +1318,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> indirect_branch_prediction_barrier();
> }
> if (kvm_vcpu_apicv_active(vcpu))
> - avic_vcpu_load(vcpu, cpu);
> + __avic_vcpu_load(vcpu, cpu);
> }
>
> static void svm_vcpu_put(struct kvm_vcpu *vcpu)
> {
> if (kvm_vcpu_apicv_active(vcpu))
> - avic_vcpu_put(vcpu);
> + __avic_vcpu_put(vcpu);
>
> svm_prepare_host_switch(vcpu);
>
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 70850cbe5bcb..e45b5645d5e0 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -576,8 +576,8 @@ void avic_init_vmcb(struct vcpu_svm *svm);
> int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
> int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
> int avic_init_vcpu(struct vcpu_svm *svm);
> -void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
> -void avic_vcpu_put(struct kvm_vcpu *vcpu);
> +void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
> +void __avic_vcpu_put(struct kvm_vcpu *vcpu);
> void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
> void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
> void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
>
> base-commit: 44af02b939d6a6a166c9cd2d86d4c2a6959f0875


I don't see that this patch is much different that what I proposed,
especially since disable of preemption can be nested.

But I won't be arguing with you about this.

Best regards,
Maxim levitsky