[PATCH v2 0/3] Fix irq_domain vs. irq user race

From: Sverdlin, Alexander (Nokia - DE/Ulm)
Date: Fri Sep 27 2019 - 11:00:48 EST


From: Alexander Sverdlin <alexander.sverdlin@xxxxxxxxx>

Seems that the discovered race was here since adapting the PowerPC code
into genirq in 2012. I was surprized it went unnoticed all this time, but
it turns out, device registration (being it DT or ACPI) is mostly
sequential in kernel. In our case probe deferring was involved and another
independent probe(), so that the fatal sequence probably looked like
following:

CPU0 CPU1

// probe() of irq-user-device
// irq_domain not found =>
// -EPROBE_DEFER

// probe of irq_domain, // probe() of some unrelated device
// including typical in // leading to re-try of
// drivers/irqchip pattern: // irq-user-device probe(), calling
irq_domain_create() of_irq_get()
for (...) irq_create_mapping() => irq_create_mapping()
=> irq_find_mapping() => irq_find_mapping()
=> irq_domain_alloc_descs() => irq_domain_alloc_descs()
=> irq_domain_associate()
=> irq_domain_associate()

irq_domain_associate() uses irq_domain_mutex internally, this doesn't help
however, because parallel calls to irq_create_mapping() return different
virq numbers and either the irq_domain driver is not able to perform
necessary configuration or the mapping the "slave" driver has got is being
overwritten by the controller driver and the virq "slave" got is never
triggered. The test code [1] reproduces the simplified scenario, which
doesn't involve real deferred probe.

As irq_find_mapping() is used in interrupt handlers it cannot take locks.
I didn't want to change the semantics of the exported
irq_domain_associate() (to remove the mutex from it and wrap
irq_find_mapping()-irq_domain_associate() pair into this mutex). Therefore
irq_domain_associate() was modified to detect the collisions and the
callers were modified to deal with this.

The race in irq_create_fwspec_mapping() is distinct, but has the same
nature: making a decision based on the result of unprotected
irq_find_mapping() call.

Alexander Sverdlin (3):
genirq/irqdomain: Check for existing mapping in irq_domain_associate()
genirq/irqdomain: Re-check mapping after associate in
irq_create_mapping()
genirq/irqdomain: Detect type race in irq_create_fwspec_mapping()

kernel/irq/irqdomain.c | 88 +++++++++++++++++++++++++++++---------------------
1 file changed, 52 insertions(+), 36 deletions(-)

Changelog:
v2:
- EBUSY -> EEXIST
- Only try to find existing mapping if irq_domain_associate() found
one

[1] Test kernel module:

// SPDX-License-Identifier: GPL-2.0
/**********************************************************************
* Author: Alexander Sverdlin <alexander.sverdlin@xxxxxxxxx>
*
* Copyright (c) 2019 Nokia
*
* This module is intended to test the discovered race condition in
* irq_create_mapping() function. The typical use case is when
* irq_create_mapping() is being called from an IRQ controller driver
* and of_irq_get() is used in a device driver requesting an IRQ from
* the particular IRQ controller. But of_irq_get() is calling
* irq_create_mapping() internally and this is the genuine race.
**********************************************************************/

#include <linux/atomic.h>
#include <linux/irqdomain.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/module.h>

static unsigned iterations = 1000;
module_param(iterations, uint, S_IRUGO);

static struct irq_domain *domain;
static atomic_t sync_begin = ATOMIC_INIT(0);
static atomic_t sync_end = ATOMIC_INIT(0);

static int icm_thread(void *data)
{
unsigned *res = data;

while (!kthread_should_stop()) {
/* Wait until main thread signals start */
if (!atomic_read(&sync_begin))
continue;
*res = irq_create_mapping(domain, 0);
smp_mb__before_atomic();
atomic_dec(&sync_begin);
/* Wait until all test threads attepted the mapping */
while (!kthread_should_stop())
if (!atomic_read(&sync_begin))
break;
atomic_dec(&sync_end);
}
return 0;
}

static int irq_create_mapping_race_init(void)
{
const static struct irq_domain_ops ops;
struct task_struct *thr[2];
unsigned irq[ARRAY_SIZE(thr)];
unsigned i;
int ret = 0;

domain = irq_domain_create_linear(NULL, 2, &ops, NULL);
if (!domain) {
pr_err("irq_domain_create_linear() failed\n");
return -EINVAL;
}

for (i = 0; i < ARRAY_SIZE(thr); i++)
thr[i] = kthread_run(icm_thread, &irq[i], "icm_thr%u", i);

for (i = 0; i < iterations; i++) {
unsigned j;

/* Signal test threads to attempt the mapping */
atomic_add(ARRAY_SIZE(thr), &sync_end);
atomic_add(ARRAY_SIZE(thr), &sync_begin);
/* Wait until all threads have made an iteration */
while (atomic_read(&sync_end))
cpu_relax();
smp_mb__after_atomic();

for (j = 0; j < ARRAY_SIZE(thr); j++)
if (!irq[j]) {
pr_err("%s: %s got no virq\n", domain->name,
thr[j]->comm);
ret = -ENOENT;
goto stop_threads;
}

for (j = 1; j < ARRAY_SIZE(thr); j++)
if (irq[0] != irq[j]) {
pr_err("%s: %s got virq %u and %s got virq %u\n",
domain->name, thr[0]->comm, irq[0],
thr[j]->comm, irq[j]);
ret = -EINVAL;
goto stop_threads;
}

irq_dispose_mapping(irq[0]);
}

stop_threads:
for (i = 0; i < ARRAY_SIZE(thr); i++)
kthread_stop(thr[i]);

irq_domain_remove(domain);

return ret;
}
module_init(irq_create_mapping_race_init);

static void __exit irq_create_mapping_race_exit(void)
{
}
module_exit(irq_create_mapping_race_exit);

MODULE_AUTHOR("Alexander Sverdlin <alexander.sverdlin@xxxxxxxxx>");
MODULE_DESCRIPTION("irq_create_mapping() race test module");
MODULE_LICENSE("GPL");