[PATCH] genirq: Own affinity hint

From: Jakub Sitnicki
Date: Wed Oct 25 2023 - 10:15:30 EST


Interfaces for setting affinity hint [1] have a rather surprising contract
with the caller [2]. The cpumask memory passed as a hint must be kept
alive, and stay unchanged, until the affinity hint gets updated again or
cleared. Otherwise reading out /proc/irq/N/affinity_hint, which
dereferences the pointer to the cpumask holding the hint, will access
potentially invalid memory.

This forces callers to allocate the affinity hint on heap and mange its
lifetime. That is unless they fall into the category of cpumask helpers -
get_cpu_mask, cpu_mask_of[_node] - which produce a pointer to a mask
constant.

Change the affinity hint interfaces to make a copy of the affinity
hint. This way the operation from the caller point of view becomes a "set
and forget." Also there is no catch any more that the passed cpumask can't
be allocated on stack [3].

No updates are needed to the existing call sites right away. They can be
adapted to simplify resource management on their side on their own
schedule.

[1] commit 65c7cdedeb30 ("genirq: Provide new interfaces for affinity hints")
[2] commit 18a048370b06 ("net: mana: Fix accessing freed irq affinity_hint")
[3] commit 8deec94c6040 ("net: stmmac: set IRQ affinity hint for multi MSI vectors")

Cc: Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx>
Signed-off-by: Jakub Sitnicki <jakub@xxxxxxxxxxxxxx>
---
include/linux/interrupt.h | 4 ++++
include/linux/irq.h | 1 +
kernel/irq/irqdesc.c | 32 ++++++++++++++++++++------------
kernel/irq/manage.c | 11 ++++-------
kernel/irq/proc.c | 22 ++++++----------------
5 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 4a1dc88ddbff..14ea8d2a39a5 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -320,6 +320,8 @@ extern int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m,
* @m: cpumask pointer (NULL to clear the hint)
*
* Updates the affinity hint, but does not change the affinity of the interrupt.
+ *
+ * Memory pointed by @m is not accessed after the call returns.
*/
static inline int
irq_update_affinity_hint(unsigned int irq, const struct cpumask *m)
@@ -335,6 +337,8 @@ irq_update_affinity_hint(unsigned int irq, const struct cpumask *m)
*
* Updates the affinity hint and if @m is not NULL it applies it as the
* affinity of that interrupt.
+ *
+ * Memory pointed by @m is not accessed after the call returns.
*/
static inline int
irq_set_affinity_and_hint(unsigned int irq, const struct cpumask *m)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index d8a6fdce9373..329c7c7be5a1 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -153,6 +153,7 @@ struct irq_common_data {
struct msi_desc *msi_desc;
#ifdef CONFIG_SMP
cpumask_var_t affinity;
+ cpumask_var_t affinity_hint;
#endif
#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
cpumask_var_t effective_affinity;
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 27ca1c866f29..fb443b702f22 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -55,26 +55,33 @@ static int alloc_masks(struct irq_desc *desc, int node)
{
if (!zalloc_cpumask_var_node(&desc->irq_common_data.affinity,
GFP_KERNEL, node))
- return -ENOMEM;
+ goto err_affinity;
+ if (!zalloc_cpumask_var_node(&desc->irq_common_data.affinity_hint,
+ GFP_KERNEL, node))
+ goto err_affinity_hint;

#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
if (!zalloc_cpumask_var_node(&desc->irq_common_data.effective_affinity,
- GFP_KERNEL, node)) {
- free_cpumask_var(desc->irq_common_data.affinity);
- return -ENOMEM;
- }
+ GFP_KERNEL, node))
+ goto err_effective_affinity;
#endif

#ifdef CONFIG_GENERIC_PENDING_IRQ
- if (!zalloc_cpumask_var_node(&desc->pending_mask, GFP_KERNEL, node)) {
-#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
- free_cpumask_var(desc->irq_common_data.effective_affinity);
-#endif
- free_cpumask_var(desc->irq_common_data.affinity);
- return -ENOMEM;
- }
+ if (!zalloc_cpumask_var_node(&desc->pending_mask, GFP_KERNEL, node))
+ goto err_pending_mask;
#endif
return 0;
+
+err_pending_mask:
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ free_cpumask_var(desc->irq_common_data.effective_affinity);
+#endif
+err_effective_affinity:
+ free_cpumask_var(desc->irq_common_data.affinity_hint);
+err_affinity_hint:
+ free_cpumask_var(desc->irq_common_data.affinity);
+err_affinity:
+ return -ENOMEM;
}

static void desc_smp_init(struct irq_desc *desc, int node,
@@ -391,6 +398,7 @@ static void free_masks(struct irq_desc *desc)
free_cpumask_var(desc->pending_mask);
#endif
free_cpumask_var(desc->irq_common_data.affinity);
+ free_cpumask_var(desc->irq_common_data.affinity_hint);
#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
free_cpumask_var(desc->irq_common_data.effective_affinity);
#endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index d309ba84e08a..573560645add 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -506,7 +506,10 @@ int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m,

if (!desc)
return -EINVAL;
- desc->affinity_hint = m;
+ if (m)
+ cpumask_copy(desc->irq_common_data.affinity_hint, m);
+ else
+ cpumask_clear(desc->irq_common_data.affinity_hint);
irq_put_desc_unlock(desc, flags);
if (m && setaffinity)
__irq_set_affinity(irq, m, false);
@@ -1916,12 +1919,6 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
irq_shutdown(desc);
}

-#ifdef CONFIG_SMP
- /* make sure affinity_hint is cleaned up */
- if (WARN_ON_ONCE(desc->affinity_hint))
- desc->affinity_hint = NULL;
-#endif
-
raw_spin_unlock_irqrestore(&desc->lock, flags);
/*
* Drop bus_lock here so the changes which were done in the chip
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 623b8136e9af..733324bbfb91 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -39,6 +39,7 @@ static struct proc_dir_entry *root_irq_dir;
enum {
AFFINITY,
AFFINITY_LIST,
+ AFFINITY_HINT,
EFFECTIVE,
EFFECTIVE_LIST,
};
@@ -57,6 +58,9 @@ static int show_irq_affinity(int type, struct seq_file *m)
mask = desc->pending_mask;
#endif
break;
+ case AFFINITY_HINT:
+ mask = desc->irq_common_data.affinity_hint;
+ break;
case EFFECTIVE:
case EFFECTIVE_LIST:
#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
@@ -73,6 +77,7 @@ static int show_irq_affinity(int type, struct seq_file *m)
seq_printf(m, "%*pbl\n", cpumask_pr_args(mask));
break;
case AFFINITY:
+ case AFFINITY_HINT:
case EFFECTIVE:
seq_printf(m, "%*pb\n", cpumask_pr_args(mask));
break;
@@ -82,22 +87,7 @@ static int show_irq_affinity(int type, struct seq_file *m)

static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
{
- struct irq_desc *desc = irq_to_desc((long)m->private);
- unsigned long flags;
- cpumask_var_t mask;
-
- if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
- return -ENOMEM;
-
- raw_spin_lock_irqsave(&desc->lock, flags);
- if (desc->affinity_hint)
- cpumask_copy(mask, desc->affinity_hint);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
-
- seq_printf(m, "%*pb\n", cpumask_pr_args(mask));
- free_cpumask_var(mask);
-
- return 0;
+ return show_irq_affinity(AFFINITY_HINT, m);
}

int no_irq_affinity;
--
2.41.0