Re: [PATCH v2] arm64: gic: increase the number of IRQ descriptors

From: Shanker Donthineni
Date: Tue Jan 10 2023 - 09:23:41 EST


Hi Marc,

On 1/10/23 02:20, Marc Zyngier wrote:
External email: Use caution opening links or attachments


On Mon, 09 Jan 2023 17:13:25 +0000,
Shanker Donthineni <sdonthineni@xxxxxxxxxx> wrote:

I'm happy to help with it, but I'm certainly not willing to accept any
sort of new compile-time limit.

Thanks for helping with a scalable solution instead of static
allocation. Please include me whenever patches posted to LKML. I'm
happy to verify on NVIDIA server platforms and provide test
feedback.


I offered to help you. I didn't offer to do the work for you! ;-)


I've looked at the IDR/IDA API. There is no suitable function for
allocating contiguous IDs to replace bitmap API.

__irq_alloc_descs():

mutex_lock(&sparse_irq_lock);

start = bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS,
from, cnt, 0);
ret = -EEXIST;

Is there any existing API that I can use for allocating contiguous IDs?

I think you should address the problem the other way around, as there
are lower hanging fruits:

- turn the irq_desc_tree radix tree into a XArray

- use the XArray mark feature to reimplement the irqs_resend bitmap

Once you have done that, you have already halved the memory usage.
To implement the allocated_irqs bitmap functionality, you have a
bunch of options:

- make the XArray an allocating XArray, and iterate over XA_FREE_MARK
to find the free range (see how the infiniband subsystem is doing
exactly that)

- use another Xarray mark to annotate the allocated IRQs, find the
distance between two allocations, and use this range if the request
fits (a poor man's variation of the above)

- use a sideband data structure such as the GICv3 LPI allocator, which
is already dealing with range allocation (I'd rather avoid that)

- something else?

Thanks for providing the guidance. The irq_resend change will be simple,
IDR will fit perfectly. Could you comment on the below 2 patches which are
using IDR API?

One IDR variable is used for both the IRQ ID allocation & descriptoirs.

I'll test and post patches for comments if you're okay with the approach.

Patch 1/2:
genirq: Prepare code for IDR based allocation

Introduce helper functions for managing Linux IRQ IDs and
define for both SPARSE_IRQ and non-SPARSE_IRQ seperately.

There is no change in functional behavior.

Changes:
-Helper function irq_alloc_descs_ids() for allocatind IRQ IDs
-Helper function irq_free_descs_ids() to free IRQ IDs
-Helper function irq_get_next_id() to get next IRQ ID

diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index fd0996274401..a40ac0c58550 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -131,7 +131,6 @@ int nr_irqs = NR_IRQS;
EXPORT_SYMBOL_GPL(nr_irqs);

static DEFINE_MUTEX(sparse_irq_lock);
-static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS);

#ifdef CONFIG_SPARSE_IRQ

@@ -344,6 +343,7 @@ static void irq_sysfs_del(struct irq_desc *desc) {}

#endif /* CONFIG_SYSFS */

+static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS);
static RADIX_TREE(irq_desc_tree, GFP_KERNEL);

static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
@@ -469,6 +469,22 @@ static void free_desc(unsigned int irq)
call_rcu(&desc->rcu, delayed_free_desc);
}

+static void irq_free_descs_ids(unsigned int from, unsigned int cnt)
+{
+ bitmap_clear(allocated_irqs, from, cnt);
+}
+
+static int irq_alloc_descs_ids(unsigned int from, unsigned int cnt)
+{
+ return bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS,
+ from, cnt, 0);
+}
+
+static unsigned int irq_get_next_id(unsigned int offset)
+{
+ return find_next_bit(allocated_irqs, nr_irqs, offset);
+}
+
static int alloc_descs(unsigned int start, unsigned int cnt, int node,
const struct irq_affinity_desc *affinity,
struct module *owner)
@@ -553,6 +569,8 @@ int __init early_irq_init(void)

#else /* !CONFIG_SPARSE_IRQ */

+static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS);
+
struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
[0 ... NR_IRQS-1] = {
.handle_irq = handle_bad_irq,
@@ -591,6 +609,22 @@ struct irq_desc *irq_to_desc(unsigned int irq)
}
EXPORT_SYMBOL(irq_to_desc);

+static void irq_free_descs_ids(unsigned int from, unsigned int cnt)
+{
+ bitmap_clear(allocated_irqs, from, cnt);
+}
+
+static int irq_alloc_descs_ids(unsigned int from, unsigned int cnt)
+{
+ return bitmap_find_next_zero_area(allocated_irqs, NR_IRQS,
+ from, cnt, 0);
+}
+
+static unsigned int irq_get_next_id(unsigned int offset)
+{
+ return find_next_bit(allocated_irqs, nr_irqs, offset);
+}
+
static void free_desc(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -768,7 +802,7 @@ void irq_free_descs(unsigned int from, unsigned int cnt)
for (i = 0; i < cnt; i++)
free_desc(from + i);

- bitmap_clear(allocated_irqs, from, cnt);
+ irq_free_descs_ids(from, cnt);
mutex_unlock(&sparse_irq_lock);
}
EXPORT_SYMBOL_GPL(irq_free_descs);
@@ -810,8 +844,7 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,

mutex_lock(&sparse_irq_lock);

- start = bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS,
- from, cnt, 0);
+ start = irq_alloc_descs_ids(from, cnt);
ret = -EEXIST;
if (irq >=0 && start != irq)
goto unlock;
@@ -836,7 +869,7 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs);
*/
unsigned int irq_get_next_irq(unsigned int offset)
{
- return find_next_bit(allocated_irqs, nr_irqs, offset);
+ return irq_get_next_id(offset);
}


PATCH 2/2:
genirq: Use IDR API for Linux-IRQ IDs allocation

The build time config paramter IRQ_BITMAP_BITS (NR_IRQS+8196)
may not be sufficient for some architectures. The interrupt ID
sparse is huge for ARM-GIC architecture ~32 bits. Static bitmap
memory for managing IDs is not optimal when NR_IRQS is set to
a high value.

It uses the IDR API for the IRQ ID allocation/deallocation and
its descriptors management insertion/deletion/search. No other
references to macro IRQ_BITMAP_BITS hence remove it.

And also covert static allocation of the 'irqs_resend' bitmap
to dynamic allocation using IDR.

diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 5fdc0b557579..501f90962644 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -11,12 +11,6 @@
#include <linux/pm_runtime.h>
#include <linux/sched/clock.h>

-#ifdef CONFIG_SPARSE_IRQ
-# define IRQ_BITMAP_BITS (NR_IRQS + 8196)
-#else
-# define IRQ_BITMAP_BITS NR_IRQS
-#endif
-
#define istate core_internal_state__do_not_mess_with_it

extern bool noirqdebug;
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index a40ac0c58550..bb1febd3a420 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -343,25 +343,25 @@ static void irq_sysfs_del(struct irq_desc *desc) {}

#endif /* CONFIG_SYSFS */

-static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS);
-static RADIX_TREE(irq_desc_tree, GFP_KERNEL);
+static DEFINE_IDR(idr_irq_descs);

static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
{
- radix_tree_insert(&irq_desc_tree, irq, desc);
+ idr_replace(&idr_irq_descs, desc, irq);
}

struct irq_desc *irq_to_desc(unsigned int irq)
{
- return radix_tree_lookup(&irq_desc_tree, irq);
+ return idr_find(&idr_irq_descs, irq);
}
+
#ifdef CONFIG_KVM_BOOK3S_64_HV_MODULE
EXPORT_SYMBOL_GPL(irq_to_desc);
#endif

static void delete_irq_desc(unsigned int irq)
{
- radix_tree_delete(&irq_desc_tree, irq);
+ idr_replace(&idr_irq_descs, NULL, irq);
}

#ifdef CONFIG_SMP
@@ -471,18 +471,48 @@ static void free_desc(unsigned int irq)

static void irq_free_descs_ids(unsigned int from, unsigned int cnt)
{
- bitmap_clear(allocated_irqs, from, cnt);
+ int i;
+
+ for (i = 0; i < cnt; i++)
+ idr_remove(&idr_irq_descs, from + i);
}

static int irq_alloc_descs_ids(unsigned int from, unsigned int cnt)
{
- return bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS,
- from, cnt, 0);
+ int start, id, i;
+
+ do {
+ /* Allocate starting ID */
+ start = idr_alloc(&idr_irq_descs, NULL, from, 0, GFP_ATOMIC);
+ if (start < 0)
+ return start;
+ idr_set_cursor(&idr_irq_descs, start);
+
+ /* Allocate contiguous IDs */
+ for (i = 1; i < cnt; i++) {
+ id = idr_alloc_cyclic(&idr_irq_descs, NULL, start + i,
+ start + i, GFP_ATOMIC);
+ if (id < 0) {
+ irq_free_descs_ids(from, i);
+ break;
+ }
+ }
+
+ /* Allocated 'cnt' IDs */
+ if (i == cnt)
+ return start;
+ from = idr_get_cursor(&idr_irq_descs);
+ } while (from < INT_MAX);
+
+ irq_free_descs_ids(start, i);
+ return -ENOSPC;
}

static unsigned int irq_get_next_id(unsigned int offset)
{
- return find_next_bit(allocated_irqs, nr_irqs, offset);
+ int id;
+
+ return idr_get_next(&idr_irqs, &id) ? id : -EINVAL;
}

static int alloc_descs(unsigned int start, unsigned int cnt, int node,
@@ -521,7 +551,6 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node,
irq_sysfs_add(start + i, desc);
irq_add_debugfs_entry(start + i, desc);
}
- bitmap_set(allocated_irqs, start, cnt);
return start;

err:
@@ -532,8 +561,6 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node,

static int irq_expand_nr_irqs(unsigned int nr)
{
- if (nr > IRQ_BITMAP_BITS)
- return -ENOMEM;
nr_irqs = nr;
return 0;
}
@@ -542,6 +569,7 @@ int __init early_irq_init(void)
{
int i, initcnt, node = first_online_node;
struct irq_desc *desc;
+ int irq;
init_irq_default_affinity();

@@ -550,19 +578,10 @@ int __init early_irq_init(void)
printk(KERN_INFO "NR_IRQS: %d, nr_irqs: %d, preallocated irqs: %d\n",
NR_IRQS, nr_irqs, initcnt);

- if (WARN_ON(nr_irqs > IRQ_BITMAP_BITS))
- nr_irqs = IRQ_BITMAP_BITS;
-
- if (WARN_ON(initcnt > IRQ_BITMAP_BITS))
- initcnt = IRQ_BITMAP_BITS;
-
- if (initcnt > nr_irqs)
- nr_irqs = initcnt;
-
for (i = 0; i < initcnt; i++) {
- desc = alloc_desc(i, node, 0, NULL, NULL);
- set_bit(i, allocated_irqs);
- irq_insert_desc(i, desc);
+ irq = irq_alloc_descs_ids(0, 1);
+ desc = alloc_desc(irq, node, 0, NULL, NULL);
+ irq_insert_desc(irq, desc);
}
return arch_early_irq_init();
}
@@ -855,6 +874,8 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
goto unlock;
}
ret = alloc_descs(start, cnt, node, affinity, owner);
+ if (ret != start)
+ irq_free_descs_ids(start, cnt);
unlock:
mutex_unlock(&sparse_irq_lock);
return ret;
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index 0c46e9fe3a89..1c9db8e03fba 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -21,8 +21,8 @@

#ifdef CONFIG_HARDIRQS_SW_RESEND

-/* Bitmap to handle software resend of interrupts: */
-static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
+/* IDR map to handle software resend of interrupts: */
+static DEFINE_IDR(irqs_resend);

/*
* Run software resends of IRQ's
@@ -30,14 +30,11 @@ static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
static void resend_irqs(struct tasklet_struct *unused)
{
struct irq_desc *desc;
- int irq;
-
- while (!bitmap_empty(irqs_resend, nr_irqs)) {
- irq = find_first_bit(irqs_resend, nr_irqs);
- clear_bit(irq, irqs_resend);
- desc = irq_to_desc(irq);
- if (!desc)
- continue;
+ int id;
+
+ idr_for_each_entry(&irqs_resend, desc, id) {
+ idr_replace(&irqs_resend, NULL, id);
+ idr_remove(&irqs_resend, id);
local_irq_disable();
desc->handle_irq(desc);
local_irq_enable();
@@ -49,7 +46,7 @@ static DECLARE_TASKLET(resend_tasklet, resend_irqs);

static int irq_sw_resend(struct irq_desc *desc)
{
- unsigned int irq = irq_desc_get_irq(desc);
+ int id;
/*
* Validate whether this interrupt can be safely injected from
@@ -70,11 +67,13 @@ static int irq_sw_resend(struct irq_desc *desc)
*/
if (!desc->parent_irq)
return -EINVAL;
- irq = desc->parent_irq;
}

/* Set it pending and activate the softirq: */
- set_bit(irq, irqs_resend);
+ id = idr_alloc(&irqs_resend, desc, 0, 0, GFP_ATOMIC);
+ if (id < 0)
+ return id;
+
tasklet_schedule(&resend_tasklet);
return 0;
}