Re: irq_desc use-after-free in smp_irq_move_cleanup_interrupt

From: Thomas Gleixner
Date: Wed Nov 25 2015 - 16:13:36 EST


On Wed, 25 Nov 2015, Thomas Gleixner wrote:
> So if CPU1 gets the IPI _BEFORE_ move_in_progress is set to 0, and
> does not get another IPI before the next move ..... That has been that
> way forever.
>
> Duh. Working on a real fix this time.

Here you go. Completely untested of course.

Larger than I hoped for, but the simple fix of just clearing the
move_in_progress flag before sending the IPI does not work because:

CPU0 CPU1 CPU2
data->move_in_progress=0
sendIPI()
set_affinity()
lock_vector() handle_IPI
move_in_progress = 1 lock_vector()
unlock_vector()
move_in_progress == 1
-> no cleanup

So we are back to square one. Now one might think that taking vector
lock prevents that issue:

CPU0 CPU1 CPU2
lock_vector()
data->move_in_progress=0
sendIPI()
unlock_vector()
set_affinity()
assign_irq_vector()
lock_vector() handle_IPI
move_in_progress = 1 lock_vector()
unlock_vector()
move_in_progress == 1
Not really.

So now the solution is:

CPU0 CPU1 CPU2
lock_vector()
data->move_in_progress=0
data->cleanup_mask = data->old_domain
sendIPI()
unlock_vector()
set_affinity()
assign_irq_vector()
lock_vector()
if (move_in_progress ||
!empty(cleanup_mask)) {
unlock_vector()
return -EBUSY; handle_IPI
} lock_vector()
move_in_progress == 0
cpu is set in cleanup mask
->cleanup vector

Looks a bit overkill with the extra cpumask. I tried a simple counter
but that does not work versus cpu unplug as we do not know whether the
outgoing cpu is involved in the cleanup or not. And if the cpu is
involved we starve assign_irq_vector() ....

The upside of this is that we get rid of that atomic allocation in
__send_cleanup_vector().

Brain hurts by now.

Not-Yet-Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
arch/x86/kernel/apic/vector.c | 37 ++++++++++++++++---------------------
1 file changed, 16 insertions(+), 21 deletions(-)

--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -25,6 +25,7 @@ struct apic_chip_data {
struct irq_cfg cfg;
cpumask_var_t domain;
cpumask_var_t old_domain;
+ cpumask_var_t cleanup_mask;
u8 move_in_progress : 1;
};

@@ -83,7 +84,11 @@ static struct apic_chip_data *alloc_apic
goto out_data;
if (!zalloc_cpumask_var_node(&data->old_domain, GFP_KERNEL, node))
goto out_domain;
+ if (!zalloc_cpumask_var_node(&data->cleanup_mask, GFP_KERNEL, node))
+ goto out_old;
return data;
+out_old:
+ free_cpumask_var(data->old_domain);
out_domain:
free_cpumask_var(data->domain);
out_data:
@@ -96,6 +101,7 @@ static void free_apic_chip_data(struct a
if (data) {
free_cpumask_var(data->domain);
free_cpumask_var(data->old_domain);
+ free_cpumask_var(data->cleanup_mask);
kfree(data);
}
}
@@ -118,7 +124,7 @@ static int __assign_irq_vector(int irq,
static int current_offset = VECTOR_OFFSET_START % 16;
int cpu, err;

- if (d->move_in_progress)
+ if (d->move_in_progress || !cpumask_empty(d->cleanup_mask))
return -EBUSY;

/* Only try and allocate irqs on cpus that are present */
@@ -511,20 +517,11 @@ static struct irq_chip lapic_controller
#ifdef CONFIG_SMP
static void __send_cleanup_vector(struct apic_chip_data *data)
{
- cpumask_var_t cleanup_mask;
-
- if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
- unsigned int i;
-
- for_each_cpu_and(i, data->old_domain, cpu_online_mask)
- apic->send_IPI_mask(cpumask_of(i),
- IRQ_MOVE_CLEANUP_VECTOR);
- } else {
- cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
- apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
- free_cpumask_var(cleanup_mask);
- }
+ raw_spin_lock(&vector_lock);
+ cpumask_and(data->cleanup_mask, data->old_domain, cpu_online_mask);
data->move_in_progress = 0;
+ apic->send_IPI_mask(data->cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ raw_spin_unlock(&vector_lock);
}

void send_cleanup_vector(struct irq_cfg *cfg)
@@ -568,14 +565,11 @@ asmlinkage __visible void smp_irq_move_c
goto unlock;

/*
- * Check if the irq migration is in progress. If so, we
- * haven't received the cleanup request yet for this irq.
+ * Nothing to cleanup if irq migration is in progress
+ * or this cpu is not set in the cleanup mask.
*/
- if (data->move_in_progress)
- goto unlock;
-
- if (vector == data->cfg.vector &&
- cpumask_test_cpu(me, data->domain))
+ if (data->move_in_progress ||
+ !cpumask_test_cpu(me, data->cleanup_mask))
goto unlock;

irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
@@ -591,6 +585,7 @@ asmlinkage __visible void smp_irq_move_c
goto unlock;
}
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+ cpumask_clear_cpu(me, data->cleanup_mask);
unlock:
raw_spin_unlock(&desc->lock);
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/