Re: [PATCH 2/4] KVM: x86: SVM: disable preemption in avic_refresh_apicv_exec_ctrl

From: Sean Christopherson
Date: Tue Mar 01 2022 - 12:15:28 EST


On Tue, Mar 01, 2022, Maxim Levitsky wrote:
> avic_refresh_apicv_exec_ctrl is called from vcpu_enter_guest,
> without preemption disabled, however avic_vcpu_load, and
> avic_vcpu_put expect preemption to be disabled.
>
> This issue was found by lockdep.
>
> Signed-off-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
> ---
> arch/x86/kvm/svm/avic.c | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index aea0b13773fd3..e23159f3a62ba 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -640,12 +640,16 @@ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
> }
> vmcb_mark_dirty(vmcb, VMCB_AVIC);
>
> + preempt_disable();
> +
> if (activated)
> avic_vcpu_load(vcpu, vcpu->cpu);
> else
> avic_vcpu_put(vcpu);
>
> avic_set_pi_irte_mode(vcpu, activated);
> +
> + preempt_enable();

Assuming avic_set_pi_irte_mode() doesn't need to be protected, I'd prefer the
below patch. This is the second time we done messed this up.

From: Sean Christopherson <seanjc@xxxxxxxxxx>
Date: Tue, 1 Mar 2022 09:05:09 -0800
Subject: [PATCH] KVM: SVM: Disable preemption across AVIC load/put during
APICv refresh

Disable preemption when loading/putting the AVIC during an APICv refresh.
If the vCPU task is preempted and migrated ot a different pCPU, the
unprotected avic_vcpu_load() could set the wrong pCPU in the physical ID
cache/table.

Pull the necessary code out of avic_vcpu_{,un}blocking() and into a new
helper to reduce the probability of introducing this exact bug a third
time.

Fixes: df7e4827c549 ("KVM: SVM: call avic_vcpu_load/avic_vcpu_put when enabling/disabling AVIC")
Cc: stable@xxxxxxxxxxxxxxx
Reported-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/kvm/svm/avic.c | 103 ++++++++++++++++++++++------------------
arch/x86/kvm/svm/svm.c | 4 +-
arch/x86/kvm/svm/svm.h | 4 +-
3 files changed, 60 insertions(+), 51 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index aea0b13773fd..1afde44b1252 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -616,38 +616,6 @@ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
return ret;
}

-void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb *vmcb = svm->vmcb01.ptr;
- bool activated = kvm_vcpu_apicv_active(vcpu);
-
- if (!enable_apicv)
- return;
-
- if (activated) {
- /**
- * During AVIC temporary deactivation, guest could update
- * APIC ID, DFR and LDR registers, which would not be trapped
- * by avic_unaccelerated_access_interception(). In this case,
- * we need to check and update the AVIC logical APIC ID table
- * accordingly before re-activating.
- */
- avic_apicv_post_state_restore(vcpu);
- vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
- } else {
- vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
- }
- vmcb_mark_dirty(vmcb, VMCB_AVIC);
-
- if (activated)
- avic_vcpu_load(vcpu, vcpu->cpu);
- else
- avic_vcpu_put(vcpu);
-
- avic_set_pi_irte_mode(vcpu, activated);
-}
-
static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
{
unsigned long flags;
@@ -899,7 +867,7 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
return ret;
}

-void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
u64 entry;
/* ID = 0xff (broadcast), ID > 0xff (reserved) */
@@ -936,7 +904,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
}

-void avic_vcpu_put(struct kvm_vcpu *vcpu)
+void __avic_vcpu_put(struct kvm_vcpu *vcpu)
{
u64 entry;
struct vcpu_svm *svm = to_svm(vcpu);
@@ -955,13 +923,63 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}

-void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
+static void avic_vcpu_load(struct kvm_vcpu *vcpu)
{
- if (!kvm_vcpu_apicv_active(vcpu))
- return;
+ int cpu = get_cpu();

+ WARN_ON(cpu != vcpu->cpu);
+
+ __avic_vcpu_load(vcpu, cpu);
+
+ put_cpu();
+}
+
+static void avic_vcpu_put(struct kvm_vcpu *vcpu)
+{
preempt_disable();

+ __avic_vcpu_put(vcpu);
+
+ preempt_enable();
+}
+
+void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb *vmcb = svm->vmcb01.ptr;
+ bool activated = kvm_vcpu_apicv_active(vcpu);
+
+ if (!enable_apicv)
+ return;
+
+ if (activated) {
+ /**
+ * During AVIC temporary deactivation, guest could update
+ * APIC ID, DFR and LDR registers, which would not be trapped
+ * by avic_unaccelerated_access_interception(). In this case,
+ * we need to check and update the AVIC logical APIC ID table
+ * accordingly before re-activating.
+ */
+ avic_apicv_post_state_restore(vcpu);
+ vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
+ } else {
+ vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
+ }
+ vmcb_mark_dirty(vmcb, VMCB_AVIC);
+
+ if (activated)
+ avic_vcpu_load(vcpu);
+ else
+ avic_vcpu_put(vcpu);
+
+ avic_set_pi_irte_mode(vcpu, activated);
+}
+
+void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_vcpu_apicv_active(vcpu))
+ return;
+
/*
* Unload the AVIC when the vCPU is about to block, _before_
* the vCPU actually blocks.
@@ -976,21 +994,12 @@ void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
* the cause of errata #1235).
*/
avic_vcpu_put(vcpu);
-
- preempt_enable();
}

void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
- int cpu;
-
if (!kvm_vcpu_apicv_active(vcpu))
return;

- cpu = get_cpu();
- WARN_ON(cpu != vcpu->cpu);
-
- avic_vcpu_load(vcpu, cpu);
-
- put_cpu();
+ avic_vcpu_load(vcpu);
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7038c76fa841..c5e3f219803e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1318,13 +1318,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
indirect_branch_prediction_barrier();
}
if (kvm_vcpu_apicv_active(vcpu))
- avic_vcpu_load(vcpu, cpu);
+ __avic_vcpu_load(vcpu, cpu);
}

static void svm_vcpu_put(struct kvm_vcpu *vcpu)
{
if (kvm_vcpu_apicv_active(vcpu))
- avic_vcpu_put(vcpu);
+ __avic_vcpu_put(vcpu);

svm_prepare_host_switch(vcpu);

diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 70850cbe5bcb..e45b5645d5e0 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -576,8 +576,8 @@ void avic_init_vmcb(struct vcpu_svm *svm);
int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
int avic_init_vcpu(struct vcpu_svm *svm);
-void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
-void avic_vcpu_put(struct kvm_vcpu *vcpu);
+void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+void __avic_vcpu_put(struct kvm_vcpu *vcpu);
void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);

base-commit: 44af02b939d6a6a166c9cd2d86d4c2a6959f0875
--