RE: [PATCH 8/9] KVM: x86: Add EOI exit bitmap inference

From: Wu, Feng
Date: Fri Aug 07 2015 - 01:43:41 EST




> -----Original Message-----
> From: kvm-owner@xxxxxxxxxxxxxxx [mailto:kvm-owner@xxxxxxxxxxxxxxx] On
> Behalf Of Paolo Bonzini
> Sent: Wednesday, August 05, 2015 11:24 PM
> To: linux-kernel@xxxxxxxxxxxxxxx; kvm@xxxxxxxxxxxxxxx
> Cc: Steve Rutherford; rkrcmar@xxxxxxxxxx
> Subject: [PATCH 8/9] KVM: x86: Add EOI exit bitmap inference
>
> From: Steve Rutherford <srutherford@xxxxxxxxxx>
>
> In order to support a userspace IOAPIC interacting with an in kernel
> APIC, the EOI exit bitmaps need to be configurable.
>
> If the IOAPIC is in userspace (i.e. the irqchip has been split), the
> EOI exit bitmaps will be set whenever the GSI Routes are configured.
> In particular, for the low MSI routes are reservable for userspace
> IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the
> destination vector of the route will be set for the destination VCPU.
>
> The intention is for the userspace IOAPICs to use the reservable MSI
> routes to inject interrupts into the guest.
>
> This is a slight abuse of the notion of an MSI Route, given that MSIs
> classically bypass the IOAPIC. It might be worthwhile to add an
> additional route type to improve clarity.
>
> Compile tested for Intel x86.
>
> Signed-off-by: Steve Rutherford <srutherford@xxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
> Documentation/virtual/kvm/api.txt | 9 ++++++---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/ioapic.h | 2 ++
> arch/x86/kvm/irq_comm.c | 42
> +++++++++++++++++++++++++++++++++++++++
> arch/x86/kvm/lapic.c | 3 +--
> arch/x86/kvm/x86.c | 9 ++++++++-
> include/linux/kvm_host.h | 17 ++++++++++++++++
> virt/kvm/irqchip.c | 12 ++---------
> 8 files changed, 79 insertions(+), 16 deletions(-)
>
> diff --git a/Documentation/virtual/kvm/api.txt
> b/Documentation/virtual/kvm/api.txt
> index bda6cb747b23..dcd748e2d46d 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -3635,7 +3635,7 @@ KVM handlers should exit to userspace with rc =
> -EREMOTE.
> 7.5 KVM_CAP_SPLIT_IRQCHIP
>
> Architectures: x86
> -Parameters: None
> +Parameters: args[0] - number of routes reserved for userspace IOAPICs
> Returns: 0 on success, -1 on error
>
> Create a local apic for each processor in the kernel. This can be used
> @@ -3643,8 +3643,11 @@ instead of KVM_CREATE_IRQCHIP if the userspace
> VMM wishes to emulate the
> IOAPIC and PIC (and also the PIT, even though this has to be enabled
> separately).
>
> -This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in
> kernel
> -IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
> +This capability also enables in kernel routing of interrupt requests;
> +when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type
> are
> +used in the IRQ routing table. The first args[0] MSI routes are reserved
> +for the IOAPIC pins. Whenever the LAPIC receives an EOI for these routes,
> +a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
>
> Fails if VCPU has already been created, or if the irqchip is already in the
> kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> index 4294722dfd1d..4bc714f7b164 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -687,6 +687,7 @@ struct kvm_arch {
> u64 disabled_quirks;
>
> bool irqchip_split;
> + u8 nr_reserved_ioapic_pins;
> };
>
> struct kvm_vm_stat {
> diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
> index a8842c0dee73..084617d37c74 100644
> --- a/arch/x86/kvm/ioapic.h
> +++ b/arch/x86/kvm/ioapic.h
> @@ -9,6 +9,7 @@ struct kvm;
> struct kvm_vcpu;
>
> #define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS
> +#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
> #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
> #define IOAPIC_EDGE_TRIG 0
> #define IOAPIC_LEVEL_TRIG 1
> @@ -121,5 +122,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct
> kvm_lapic *src,
> int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
> int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
> void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
> +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
>
> #endif
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 67f6b62a6814..177460998bb0 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -335,3 +335,45 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
> {
> return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
> }
> +
> +void kvm_arch_irq_routing_update(struct kvm *kvm)
> +{
> + if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
> + return;
> + kvm_make_scan_ioapic_request(kvm);
> +}
> +
> +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
> +{
> + struct kvm *kvm = vcpu->kvm;
> + struct kvm_kernel_irq_routing_entry *entry;
> + struct kvm_irq_routing_table *table;
> + u32 i, nr_ioapic_pins;
> + int idx;
> +
> + /* kvm->irq_routing must be read after clearing
> + * KVM_SCAN_IOAPIC. */
> + smp_mb();
> + idx = srcu_read_lock(&kvm->irq_srcu);
> + table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
> + nr_ioapic_pins = min_t(u32, table->nr_rt_entries,
> + kvm->arch.nr_reserved_ioapic_pins);
> + for (i = 0; i < nr_ioapic_pins; ++i) {
> + hlist_for_each_entry(entry, &table->map[i], link) {
> + u32 dest_id, dest_mode;
> +
> + if (entry->type != KVM_IRQ_ROUTING_MSI)
> + continue;
> + dest_id = (entry->msi.address_lo >> 12) & 0xff;
> + dest_mode = (entry->msi.address_lo >> 2) & 0x1;
> + if (kvm_apic_match_dest(vcpu, NULL, 0, dest_id,
> + dest_mode)) {
> + u32 vector = entry->msi.data & 0xff;
> +
> + __set_bit(vector,
> + (unsigned long *) eoi_exit_bitmap);
> + }
> + }
> + }
> + srcu_read_unlock(&kvm->irq_srcu, idx);
> +}
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 14b5603ef6c5..38c580aa27c2 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -209,8 +209,7 @@ out:
> if (old)
> kfree_rcu(old, rcu);
>
> - if (ioapic_in_kernel(kvm))
> - kvm_vcpu_request_scan_ioapic(kvm);
> + kvm_make_scan_ioapic_request(kvm);
> }
>
> static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 16e2f3c577c7..c9fb11491aa3 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3571,6 +3571,9 @@ static int kvm_vm_ioctl_enable_cap(struct kvm
> *kvm,
> break;
> case KVM_CAP_SPLIT_IRQCHIP: {
> mutex_lock(&kvm->lock);
> + r = -EINVAL;
> + if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
> + goto split_irqchip_unlock;
> r = -EEXIST;
> if (irqchip_in_kernel(kvm))
> goto split_irqchip_unlock;
> @@ -3582,6 +3585,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm
> *kvm,
> /* Pairs with irqchip_in_kernel. */
> smp_wmb();
> kvm->arch.irqchip_split = true;
> + kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
> r = 0;
> split_irqchip_unlock:
> mutex_unlock(&kvm->lock);
> @@ -6173,7 +6177,10 @@ static void vcpu_scan_ioapic(struct kvm_vcpu
> *vcpu)
>
> memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8);
>
> - kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
> + if (irqchip_split(vcpu->kvm))
> + kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap);
> + else
> + kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
> kvm_x86_ops->load_eoi_exitmap(vcpu);
> }
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 653d494e13d1..27ccdf91a465 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -329,6 +329,19 @@ struct kvm_kernel_irq_routing_entry {
> struct hlist_node link;
> };
>
> +#ifdef CONFIG_HAVE_KVM_IRQCHIP
> +struct kvm_irq_routing_table {
> + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
> + struct kvm_kernel_irq_routing_entry *rt_entries;

This filed doesn't exist anymore. In fact, this changes is also in my
VT-d PI patches. If this series get merged first, I can rebase my
patches then.

Thanks,
Feng

> + u32 nr_rt_entries;
> + /*
> + * Array indexed by gsi. Each entry contains list of irq chips
> + * the gsi is connected to.
> + */
> + struct hlist_head map[0];
> +};
> +#endif
> +
> #ifndef KVM_PRIVATE_MEM_SLOTS
> #define KVM_PRIVATE_MEM_SLOTS 0
> #endif
> @@ -455,10 +468,14 @@ void vcpu_put(struct kvm_vcpu *vcpu);
>
> #ifdef __KVM_HAVE_IOAPIC
> void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
> +void kvm_arch_irq_routing_update(struct kvm *kvm);
> #else
> static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
> {
> }
> +static inline void kvm_arch_irq_routing_update(struct kvm *kvm)
> +{
> +}
> #endif
>
> #ifdef CONFIG_HAVE_KVM_IRQFD
> diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
> index 21c14244f4c4..4f85c6ee96dc 100644
> --- a/virt/kvm/irqchip.c
> +++ b/virt/kvm/irqchip.c
> @@ -31,16 +31,6 @@
> #include <trace/events/kvm.h>
> #include "irq.h"
>
> -struct kvm_irq_routing_table {
> - int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
> - u32 nr_rt_entries;
> - /*
> - * Array indexed by gsi. Each entry contains list of irq chips
> - * the gsi is connected to.
> - */
> - struct hlist_head map[0];
> -};
> -
> int kvm_irq_map_gsi(struct kvm *kvm,
> struct kvm_kernel_irq_routing_entry *entries, int gsi)
> {
> @@ -227,6 +217,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
> kvm_irq_routing_update(kvm);
> mutex_unlock(&kvm->irq_lock);
>
> + kvm_arch_irq_routing_update(kvm);
> +
> synchronize_srcu_expedited(&kvm->irq_srcu);
>
> new = old;
> --
> 1.8.3.1
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/