[PATCH v3 3/5] arm/arm64: KVM: support for un-queuing active IRQs

From: Alex BennÃe
Date: Fri Mar 13 2015 - 13:03:37 EST


From: Christoffer Dall <christoffer.dall@xxxxxxxxxx>

Migrating active interrupts causes the active state to be lost
completely. This implements some additional bitmaps to track the active
state on the distributor and export this to user space.

Signed-off-by: Christoffer Dall <christoffer.dall@xxxxxxxxxx>
Signed-off-by: Alex BennÃe <alex.bennee@xxxxxxxxxx>

---
AJB:
- fixed merge conflicts
- moved additional shared bitmaps to be dynamically allocated
- make irq_active_on_cpu dynamically allocated as well
- in vgic_queue_irq don't queue pending if already active
- in __kvm_vgic_flush_hwstate use pr_shared when checking SPIs
- vgic: clear active on CPU bit
- checkpatch, remove extraneous braces
- checkpatch, remove debug, fix overflows
- move register access fns to re-factored vgic-v2-emul.c
v2
- doc: unqueue and update_state
v3
- fix up after moving queue_lr function patch before here.
- minor wording fixes from review

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 7c55dd5..7042251 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -196,6 +196,9 @@ struct vgic_dist {
/* Level-triggered interrupt queued on VCPU interface */
struct vgic_bitmap irq_queued;

+ /* Interrupt was active when unqueue from VCPU interface */
+ struct vgic_bitmap irq_active;
+
/* Interrupt priority. Not used yet. */
struct vgic_bytemap irq_priority;

@@ -236,6 +239,9 @@ struct vgic_dist {
/* Bitmap indicating which CPU has something pending */
unsigned long *irq_pending_on_cpu;

+ /* Bitmap indicating which CPU has active IRQs */
+ unsigned long *irq_active_on_cpu;
+
struct vgic_vm_ops vm_ops;
#endif
};
@@ -269,9 +275,15 @@ struct vgic_cpu {
/* per IRQ to LR mapping */
u8 *vgic_irq_lr_map;

- /* Pending interrupts on this VCPU */
+ /* Pending/active/both interrupts on this VCPU */
DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS);
+ DECLARE_BITMAP( active_percpu, VGIC_NR_PRIVATE_IRQS);
+ DECLARE_BITMAP( pend_act_percpu, VGIC_NR_PRIVATE_IRQS);
+
+ /* Pending/active/both shared interrupts, dynamically sized */
unsigned long *pending_shared;
+ unsigned long *active_shared;
+ unsigned long *pend_act_shared;

/* Bitmap of used/free list registers */
DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS);
@@ -311,6 +323,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
bool level);
void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
+int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
struct kvm_exit_mmio *mmio);

diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c
index 19c6210..c818662 100644
--- a/virt/kvm/arm/vgic-v2-emul.c
+++ b/virt/kvm/arm/vgic-v2-emul.c
@@ -107,6 +107,22 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
vcpu->vcpu_id);
}

+static bool handle_mmio_set_active_reg(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset,
+ vcpu->vcpu_id);
+}
+
+static bool handle_mmio_clear_active_reg(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset,
+ vcpu->vcpu_id);
+}
+
static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
struct kvm_exit_mmio *mmio,
phys_addr_t offset)
@@ -344,13 +360,13 @@ static const struct kvm_mmio_range vgic_dist_ranges[] = {
.base = GIC_DIST_ACTIVE_SET,
.len = VGIC_MAX_IRQS / 8,
.bits_per_irq = 1,
- .handle_mmio = handle_mmio_raz_wi,
+ .handle_mmio = handle_mmio_set_active_reg,
},
{
.base = GIC_DIST_ACTIVE_CLEAR,
.len = VGIC_MAX_IRQS / 8,
.bits_per_irq = 1,
- .handle_mmio = handle_mmio_raz_wi,
+ .handle_mmio = handle_mmio_clear_active_reg,
},
{
.base = GIC_DIST_PRI,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 6682d58..22ff3bc 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -263,6 +263,13 @@ static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
}

+static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
+}
+
static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -277,6 +284,20 @@ static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
}

+static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
+}
+
static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -520,6 +541,44 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm,
return false;
}

+bool vgic_handle_set_active_reg(struct kvm *kvm,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset, int vcpu_id)
+{
+ u32 *reg;
+ struct vgic_dist *dist = &kvm->arch.vgic;
+
+ reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+
+ if (mmio->is_write) {
+ vgic_update_state(kvm);
+ return true;
+ }
+
+ return false;
+}
+
+bool vgic_handle_clear_active_reg(struct kvm *kvm,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset, int vcpu_id)
+{
+ u32 *reg;
+ struct vgic_dist *dist = &kvm->arch.vgic;
+
+ reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+
+ if (mmio->is_write) {
+ vgic_update_state(kvm);
+ return true;
+ }
+
+ return false;
+}
+
static u32 vgic_cfg_expand(u16 val)
{
u32 res = 0;
@@ -588,16 +647,12 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
}

/**
- * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
+ * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor
* @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
*
- * Move any pending IRQs that have already been assigned to LRs back to the
+ * Move any IRQs that have already been assigned to LRs back to the
* emulated distributor state so that the complete emulated state can be read
* from the main emulation structures without investigating the LRs.
- *
- * Note that IRQs in the active state in the LRs get their pending state moved
- * to the distributor but the active state stays in the LRs, because we don't
- * track the active state on the distributor side.
*/
void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
{
@@ -613,12 +668,22 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
* 01: pending
* 10: active
* 11: pending and active
- *
- * If the LR holds only an active interrupt (not pending) then
- * just leave it alone.
*/
- if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE)
- continue;
+ BUG_ON(!(lr.state & LR_STATE_MASK));
+
+ /* Reestablish SGI source for pending and active IRQs */
+ if (lr.irq < VGIC_NR_SGIS)
+ add_sgi_source(vcpu, lr.irq, lr.source);
+
+ /*
+ * If the LR holds an active (10) or a pending and active (11)
+ * interrupt then move the active state to the
+ * distributor tracking bit.
+ */
+ if (lr.state & LR_STATE_ACTIVE) {
+ vgic_irq_set_active(vcpu, lr.irq);
+ lr.state &= ~LR_STATE_ACTIVE;
+ }

/*
* Reestablish the pending state on the distributor and the
@@ -626,21 +691,19 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
* is fine, then we are only setting a few bits that were
* already set.
*/
- vgic_dist_irq_set_pending(vcpu, lr.irq);
- if (lr.irq < VGIC_NR_SGIS)
- add_sgi_source(vcpu, lr.irq, lr.source);
- lr.state &= ~LR_STATE_PENDING;
+ if (lr.state & LR_STATE_PENDING) {
+ vgic_dist_irq_set_pending(vcpu, lr.irq);
+ lr.state &= ~LR_STATE_PENDING;
+ }
+
vgic_set_lr(vcpu, i, lr);

/*
- * If there's no state left on the LR (it could still be
- * active), then the LR does not hold any useful info and can
- * be marked as free for other use.
+ * Mark the LR as free for other use.
*/
- if (!(lr.state & LR_STATE_MASK)) {
- vgic_retire_lr(i, lr.irq, vcpu);
- vgic_irq_clear_queued(vcpu, lr.irq);
- }
+ BUG_ON(lr.state & LR_STATE_MASK);
+ vgic_retire_lr(i, lr.irq, vcpu);
+ vgic_irq_clear_queued(vcpu, lr.irq);

/* Finally update the VGIC state. */
vgic_update_state(vcpu->kvm);
@@ -804,6 +867,36 @@ static int vgic_nr_shared_irqs(struct vgic_dist *dist)
return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
}

+static int compute_active_for_cpu(struct kvm_vcpu *vcpu)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ unsigned long *active, *enabled, *act_percpu, *act_shared;
+ unsigned long active_private, active_shared;
+ int nr_shared = vgic_nr_shared_irqs(dist);
+ int vcpu_id;
+
+ vcpu_id = vcpu->vcpu_id;
+ act_percpu = vcpu->arch.vgic_cpu.active_percpu;
+ act_shared = vcpu->arch.vgic_cpu.active_shared;
+
+ active = vgic_bitmap_get_cpu_map(&dist->irq_active, vcpu_id);
+ enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
+ bitmap_and(act_percpu, active, enabled, VGIC_NR_PRIVATE_IRQS);
+
+ active = vgic_bitmap_get_shared_map(&dist->irq_active);
+ enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
+ bitmap_and(act_shared, active, enabled, nr_shared);
+ bitmap_and(act_shared, act_shared,
+ vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
+ nr_shared);
+
+ active_private = find_first_bit(act_percpu, VGIC_NR_PRIVATE_IRQS);
+ active_shared = find_first_bit(act_shared, nr_shared);
+
+ return (active_private < VGIC_NR_PRIVATE_IRQS ||
+ active_shared < nr_shared);
+}
+
static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -835,7 +928,7 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)

/*
* Update the interrupt state and determine which CPUs have pending
- * interrupts. Must be called with distributor lock held.
+ * or active interrupts. Must be called with distributor lock held.
*/
void vgic_update_state(struct kvm *kvm)
{
@@ -849,10 +942,13 @@ void vgic_update_state(struct kvm *kvm)
}

kvm_for_each_vcpu(c, vcpu, kvm) {
- if (compute_pending_for_cpu(vcpu)) {
- pr_debug("CPU%d has pending interrupts\n", c);
+ if (compute_pending_for_cpu(vcpu))
set_bit(c, dist->irq_pending_on_cpu);
- }
+
+ if (compute_active_for_cpu(vcpu))
+ set_bit(c, dist->irq_active_on_cpu);
+ else
+ clear_bit(c, dist->irq_active_on_cpu);
}
}

@@ -952,7 +1048,12 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
int lr_nr, struct vgic_lr vlr)
{
- if (vgic_dist_irq_is_pending(vcpu, irq)) {
+ if (vgic_irq_is_active(vcpu, irq)) {
+ vlr.state |= LR_STATE_ACTIVE;
+ kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state);
+ vgic_irq_clear_active(vcpu, irq);
+ vgic_update_state(vcpu->kvm);
+ } else if (vgic_dist_irq_is_pending(vcpu, irq)) {
vlr.state |= LR_STATE_PENDING;
kvm_debug("Set pending: 0x%x\n", vlr.state);
}
@@ -1040,39 +1141,49 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ unsigned long *pa_percpu, *pa_shared;
int i, vcpu_id;
int overflow = 0;
+ int nr_shared = vgic_nr_shared_irqs(dist);

vcpu_id = vcpu->vcpu_id;

+ pa_percpu = vcpu->arch.vgic_cpu.pend_act_percpu;
+ pa_shared = vcpu->arch.vgic_cpu.pend_act_shared;
+
+ bitmap_or(pa_percpu, vgic_cpu->pending_percpu, vgic_cpu->active_percpu,
+ VGIC_NR_PRIVATE_IRQS);
+ bitmap_or(pa_shared, vgic_cpu->pending_shared, vgic_cpu->active_shared,
+ nr_shared);
/*
* We may not have any pending interrupt, or the interrupts
* may have been serviced from another vcpu. In all cases,
* move along.
*/
- if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
- pr_debug("CPU%d has no pending interrupt\n", vcpu_id);
+ if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu))
goto epilog;
- }

/* SGIs */
- for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) {
+ for_each_set_bit(i, pa_percpu, VGIC_NR_SGIS) {
if (!queue_sgi(vcpu, i))
overflow = 1;
}

/* PPIs */
- for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) {
+ for_each_set_bit_from(i, pa_percpu, VGIC_NR_PRIVATE_IRQS) {
if (!vgic_queue_hwirq(vcpu, i))
overflow = 1;
}

/* SPIs */
- for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) {
+ for_each_set_bit(i, pa_shared, nr_shared) {
if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
overflow = 1;
}

+
+
+
epilog:
if (overflow) {
vgic_enable_underflow(vcpu);
@@ -1219,6 +1330,17 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
}

+int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return 0;
+
+ return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
+}
+
+
void vgic_kick_vcpus(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
@@ -1391,8 +1513,12 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;

kfree(vgic_cpu->pending_shared);
+ kfree(vgic_cpu->active_shared);
+ kfree(vgic_cpu->pend_act_shared);
kfree(vgic_cpu->vgic_irq_lr_map);
vgic_cpu->pending_shared = NULL;
+ vgic_cpu->active_shared = NULL;
+ vgic_cpu->pend_act_shared = NULL;
vgic_cpu->vgic_irq_lr_map = NULL;
}

@@ -1402,9 +1528,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)

int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
+ vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
+ vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);

- if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
+ if (!vgic_cpu->pending_shared
+ || !vgic_cpu->active_shared
+ || !vgic_cpu->pend_act_shared
+ || !vgic_cpu->vgic_irq_lr_map) {
kvm_vgic_vcpu_destroy(vcpu);
return -ENOMEM;
}
@@ -1457,10 +1588,12 @@ void kvm_vgic_destroy(struct kvm *kvm)
kfree(dist->irq_spi_mpidr);
kfree(dist->irq_spi_target);
kfree(dist->irq_pending_on_cpu);
+ kfree(dist->irq_active_on_cpu);
dist->irq_sgi_sources = NULL;
dist->irq_spi_cpu = NULL;
dist->irq_spi_target = NULL;
dist->irq_pending_on_cpu = NULL;
+ dist->irq_active_on_cpu = NULL;
dist->nr_cpus = 0;
}

@@ -1496,6 +1629,7 @@ int vgic_init(struct kvm *kvm)
ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_active, nr_cpus, nr_irqs);
ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);

@@ -1508,10 +1642,13 @@ int vgic_init(struct kvm *kvm)
GFP_KERNEL);
dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
GFP_KERNEL);
+ dist->irq_active_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
+ GFP_KERNEL);
if (!dist->irq_sgi_sources ||
!dist->irq_spi_cpu ||
!dist->irq_spi_target ||
- !dist->irq_pending_on_cpu) {
+ !dist->irq_pending_on_cpu ||
+ !dist->irq_active_on_cpu) {
ret = -ENOMEM;
goto out;
}
diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h
index 1e83bdf..1e5a381 100644
--- a/virt/kvm/arm/vgic.h
+++ b/virt/kvm/arm/vgic.h
@@ -107,6 +107,14 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
phys_addr_t offset, int vcpu_id);

+bool vgic_handle_set_active_reg(struct kvm *kvm,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset, int vcpu_id);
+
+bool vgic_handle_clear_active_reg(struct kvm *kvm,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset, int vcpu_id);
+
bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
phys_addr_t offset);

--
2.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/