Re: [PATCH 3/3] KVM: x86: hyperv: implement PV IPI send hypercalls

From: Wanpeng Li
Date: Thu Jun 28 2018 - 10:05:30 EST


On Sat, 23 Jun 2018 at 03:14, Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx> wrote:
>
> 2018-06-22 16:56+0200, Vitaly Kuznetsov:
> > Using hypercall for sending IPIs is faster because this allows to specify
> > any number of vCPUs (even > 64 with sparse CPU set), the whole procedure
> > will take only one VMEXIT.
> >
> > Current Hyper-V TLFS (v5.0b) claims that HvCallSendSyntheticClusterIpi
> > hypercall can't be 'fast' (passing parameters through registers) but
> > apparently this is not true, Windows always uses it as 'fast' so we need
> > to support that.
> >
> > Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
> > ---
> > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> > @@ -1357,6 +1357,108 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
> > ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
> > }
> >
> > +static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa,
> > + bool ex, bool fast)
> > +{
> > + struct kvm *kvm = current_vcpu->kvm;
> > + struct hv_send_ipi_ex send_ipi_ex;
> > + struct hv_send_ipi send_ipi;
> > + struct kvm_vcpu *vcpu;
> > + unsigned long valid_bank_mask = 0;
> > + u64 sparse_banks[64];
> > + int sparse_banks_len, i;
> > + struct kvm_lapic_irq irq = {0};
> > + bool all_cpus;
> > +
> > + if (!ex) {
> > + if (!fast) {
> > + if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi,
> > + sizeof(send_ipi))))
> > + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> > + sparse_banks[0] = send_ipi.cpu_mask;
> > + irq.vector = send_ipi.vector;
> > + } else {
> > + /* 'reserved' part of hv_send_ipi should be 0 */
> > + if (unlikely(ingpa >> 32 != 0))
> > + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> > + sparse_banks[0] = outgpa;
> > + irq.vector = (u32)ingpa;
> > + }
> > + all_cpus = false;
> > +
> > + trace_kvm_hv_send_ipi(irq.vector, sparse_banks[0]);
> > + } else {
> > + if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex,
> > + sizeof(send_ipi_ex))))
> > + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> > +
> > + trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
> > + send_ipi_ex.vp_set.format,
> > + send_ipi_ex.vp_set.valid_bank_mask);
> > +
> > + irq.vector = send_ipi_ex.vector;
> > + valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
> > + sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
> > + sizeof(sparse_banks[0]);
> > + all_cpus = send_ipi_ex.vp_set.format !=
> > + HV_GENERIC_SET_SPARSE_4K;
>
> This would be much better readable as
>
> send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL
>
> And if Microsoft ever adds more formats, they won't be all VCPUs, so
> we're future-proofing as well.
>
> > +
> > + if (!sparse_banks_len)
> > + goto ret_success;
> > +
> > + if (!all_cpus &&
> > + kvm_read_guest(kvm,
> > + ingpa + offsetof(struct hv_send_ipi_ex,
> > + vp_set.bank_contents),
> > + sparse_banks,
> > + sparse_banks_len))
> > + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> > + }
> > +
> > + if ((irq.vector < HV_IPI_LOW_VECTOR) ||
> > + (irq.vector > HV_IPI_HIGH_VECTOR))
> > + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> > +
> > + irq.delivery_mode = APIC_DM_FIXED;
>
> I'd set this during variable definition.
>
> APIC_DM_FIXED is 0 anyway and the compiler probably won't optimize it
> here due to function with side effects since definition.
>
> > +
> > + kvm_for_each_vcpu(i, vcpu, kvm) {
> > + struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
> > + int bank = hv->vp_index / 64, sbank = 0;
> > +
> > + if (!all_cpus) {
> > + /* Banks >64 can't be represented */
> > + if (bank >= 64)
> > + continue;
> > +
> > + /* Non-ex hypercalls can only address first 64 vCPUs */
> > + if (!ex && bank)
> > + continue;
> > +
> > + if (ex) {
> > + /*
> > + * Check is the bank of this vCPU is in sparse
> > + * set and get the sparse bank number.
> > + */
> > + sbank = get_sparse_bank_no(valid_bank_mask,
> > + bank);
> > +
> > + if (sbank < 0)
> > + continue;
> > + }
> > +
> > + if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
> > + continue;
> > + }
> > +
> > + /* We fail only when APIC is disabled */
> > + if (!kvm_apic_set_irq(vcpu, &irq, NULL))
> > + return HV_STATUS_INVALID_HYPERCALL_INPUT;
>
> Does Windows use this even for 1 VCPU IPI?
>
> I'm thinking we could apply the same optimization we do for LAPIC -- RCU
> protected array that maps vp_index to vcpu.
>
> Thanks.
>
> > + }
> > +
> > +ret_success:
> > + return HV_STATUS_SUCCESS;
> > +}
> > +
> > bool kvm_hv_hypercall_enabled(struct kvm *kvm)
> > {
> > return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
> > @@ -1526,6 +1628,20 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
> > }
> > ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
> > break;
> > + case HVCALL_SEND_IPI:
> > + if (unlikely(rep)) {
> > + ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
> > + break;
> > + }
> > + ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast);
> > + break;
> > + case HVCALL_SEND_IPI_EX:

Hi Paolo and Radim,

I have already completed the patches for linux guest/kvm/qemu w/ vCPUs
<= 64, however, extra complication as the ex in hyperv should be
introduced for vCPUs > 64, so do you think vCPU <=64 is enough for
linux guest or should me introduce two hypercall as what hyperv does
w/ ex logic?

Regards,
Wanpeng Li