Re: [PATCH v2 07/31] KVM: x86: hyper-v: Create a separate ring for Direct TLB flush

From: Sean Christopherson
Date: Thu Apr 07 2022 - 13:58:03 EST


On Thu, Apr 07, 2022, Vitaly Kuznetsov wrote:
> To handle Direct TLB flush requests from L2 KVM needs to use a
> separate ring from regular Hyper-V TLB flush requests: e.g. when a
> request to flush something in L2 is made, the target vCPU can
> transition from L2 to L1, receive a request to flush a GVA for L1 and
> then try to enter L2 back. The first request needs to be processed
> then. Similarly, requests to flush GVAs in L1 must wait until L2
> exits to L1.
>
> No functional change yet as KVM doesn't handle Direct TLB flush
> requests from L2 yet.
>
> Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 3 ++-
> arch/x86/kvm/hyperv.c | 7 ++++---
> arch/x86/kvm/hyperv.h | 17 ++++++++++++++---
> 3 files changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 15d798fe280d..b8d7c1422da6 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -617,7 +617,8 @@ struct kvm_vcpu_hv {
> u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
> } cpuid_cache;
>
> - struct kvm_vcpu_hv_tlbflush_ring tlb_flush_ring;

Probably feedback for a prior patch, but please be consistent in tlbflush vs
tlb_flush. I prefer the tlb_flush variant.

> + /* Two rings for regular Hyper-V TLB flush and Direct TLB flush */
> + struct kvm_vcpu_hv_tlbflush_ring tlb_flush_ring[2];

Use an enum, then the magic numbers go away, e.g.

enum hv_tlb_flush_rings {
HV_L1_TLB_FLUSH_RING,
HV_L2_TLB_FLUSH_RING,
HV_NR_TLB_FLUSH_RINGS,
}

> };
>
> /* Xen HVM per vcpu emulation context */
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 918642bcdbd0..16cbf41b5b7b 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -956,7 +956,8 @@ static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
>
> hv_vcpu->vp_index = vcpu->vcpu_idx;
>
> - spin_lock_init(&hv_vcpu->tlb_flush_ring.write_lock);
> + spin_lock_init(&hv_vcpu->tlb_flush_ring[0].write_lock);
> + spin_lock_init(&hv_vcpu->tlb_flush_ring[1].write_lock);

Or

for (i = 0; i < ARRAY_SIZE(&hv_vcpu->tlb_flush_ring); i++)
spin_lock_init(&hv_vcpu->tlb_flush_ring[i].write_lock)

or replace ARRAY_SIZE() with HV_NR_TLB_FLUSH_RINGS.

>
> return 0;
> }
> @@ -1860,7 +1861,7 @@ static void hv_tlb_flush_ring_enqueue(struct kvm_vcpu *vcpu, bool flush_all,
> if (!hv_vcpu)
> return;
>
> - tlb_flush_ring = &hv_vcpu->tlb_flush_ring;
> + tlb_flush_ring = &hv_vcpu->tlb_flush_ring[0];

The [0] is gross, and it only gets worse in future patches that take @direct,
though this is slightly less gross:

/* Here's a comment explaining why this is hardcoded to L1's ring. */
tlb_flush_ring = &hv_vcpu->tlb_flush_ring[HV_L1_TLB_FLUSH_RING];

More thoughts in the patch that adds @direct.

> spin_lock_irqsave(&tlb_flush_ring->write_lock, flags);
>
> @@ -1920,7 +1921,7 @@ void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
> return;
> }
>
> - tlb_flush_ring = &hv_vcpu->tlb_flush_ring;
> + tlb_flush_ring = kvm_hv_get_tlb_flush_ring(vcpu);
> read_idx = READ_ONCE(tlb_flush_ring->read_idx);
> write_idx = READ_ONCE(tlb_flush_ring->write_idx);
>
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index 6847caeaaf84..448877b478ef 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -22,6 +22,7 @@
> #define __ARCH_X86_KVM_HYPERV_H__
>
> #include <linux/kvm_host.h>
> +#include "x86.h"
>
> /*
> * The #defines related to the synthetic debugger are required by KDNet, but
> @@ -147,15 +148,25 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
> int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
> struct kvm_cpuid_entry2 __user *entries);
>
> +static inline struct kvm_vcpu_hv_tlbflush_ring *kvm_hv_get_tlb_flush_ring(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> +
> + if (!is_guest_mode(vcpu))
> + return &hv_vcpu->tlb_flush_ring[0];

Maybe this?

int i = !is_guest_mode(vcpu) ? HV_L1_TLB_FLUSH_RING :
HV_L2_TLB_FLUSH_RING;

return &hv_vcpu->tlb_flush_ring[i];

Though shouldn't this be a WARN condition as of this patch? I.e. shouldn't it be
impossible to request a flush for L2 at this point?

> +
> + return &hv_vcpu->tlb_flush_ring[1];
> +}
>
> static inline void kvm_hv_vcpu_empty_flush_tlb(struct kvm_vcpu *vcpu)
> {
> - struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> + struct kvm_vcpu_hv_tlbflush_ring *tlb_flush_ring;
>
> - if (!hv_vcpu)
> + if (!to_hv_vcpu(vcpu))
> return;
>
> - hv_vcpu->tlb_flush_ring.read_idx = hv_vcpu->tlb_flush_ring.write_idx;
> + tlb_flush_ring = kvm_hv_get_tlb_flush_ring(vcpu);
> + tlb_flush_ring->read_idx = tlb_flush_ring->write_idx;
> }
> void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
>
> --
> 2.35.1
>