Re: [PATCH 2/2] Loongarch:Support loongarch avec

From: Thomas Gleixner
Date: Wed May 08 2024 - 05:43:26 EST

Next message: Alexey Charkov: "Re: [PATCH v4 6/6] arm64: dts: rockchip: Add OPP data for CPU cores on RK3588"
Previous message: Hou Tao: "Re: WARNING in fuse_request_end"
In reply to: Tianyang Zhang: "Re: [PATCH 2/2] Loongarch:Support loongarch avec"
Next in thread: Tianyang Zhang: "Re: [PATCH 2/2] Loongarch:Support loongarch avec"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

On Tue, May 07 2024 at 20:59, Tianyang Zhang wrote:
> From: zhangtianyang <zhangtianyang@xxxxxxxxxxx>

Please use your real name for the From line.

> +
> +#include <linux/init.h>
> +#include <linux/interrupt.h>
> +#include <linux/kernel.h>
> +#include <linux/irq.h>
> +#include <linux/irqchip.h>
> +#include <linux/irqdomain.h>
> +#include <linux/spinlock.h>
> +#include <linux/msi.h>
> +#include <linux/irqchip/chained_irq.h>
> +#include <linux/cpuhotplug.h>
> +#include <linux/radix-tree.h>

Please put the includes in alphabethical order.

> +#include <asm/loongarch.h>
> +#include <asm/setup.h>
> +
> +static phys_addr_t msi_base_v2;
> +
> +typedef struct irq_data *irq_map_t[NR_VECTORS];

No new pointless typedefs please.

struct irq_map {
struct irq_data irqd[NR_VECTORS];
};

> +DECLARE_PER_CPU(irq_map_t, irq_map);

What's this declaration for?

> +DEFINE_PER_CPU(irq_map_t, irq_map) = {

Why is this global and not static?

> + [0 ... NR_VECTORS - 1] = NULL,

No need to initialize to NULL. It's zeroed by default.

> +};
> +
> +struct pending_list {
> + struct list_head head;
> + raw_spinlock_t lock;
> +};

https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#struct-declarations-and-initializers

> +DEFINE_PER_CPU(struct pending_list, pending_list);

Why is this global?

> +struct loongarch_avec_chip {
> + struct fwnode_handle *fwnode;
> + struct irq_domain *domain;
> + struct irq_matrix *vector_matrix;
> + raw_spinlock_t lock;
> +} loongarch_avec;
> +
> +struct loongarch_avec_data {
> + struct list_head entry;
> + unsigned int cpu;
> + unsigned int vec;
> + unsigned int prev_cpu;
> + unsigned int prev_vec;
> +};

See link above.

> +static int assign_irq_vector(struct irq_data *irqd, const struct cpumask *dest,
> + unsigned int *cpu, int *vector)

Please read the line break section in the tip docomentation

> +{
> + int ret;
> +
> + ret = irq_matrix_alloc(loongarch_avec.vector_matrix, dest, false, cpu);
> + if (ret < 0)
> + return ret;
> + *vector = ret;
> +
> + return 0;

Why not simply returning the result of irq_matrix_alloc() and checking
it for negative value at the call site. If not negative then use it as
vector. That spares the whole indirection and makes the code readable.

> +static void loongarch_avec_sync(struct loongarch_avec_data *adata)
> +{
> + struct loongarch_avec_data *data;
> + struct pending_list *plist;
> +
> + if (cpu_online(adata->prev_cpu)) {
> + plist = per_cpu_ptr(&pending_list, adata->prev_cpu);
> +
> + data = kmalloc(sizeof(struct loongarch_avec_data), GFP_KERNEL);

This is called from loongarch_avec_set_affinity() with raw spinlocks
held and interrupts disabled. So GFP_KERNEL cannot work. You clearly did
not test that code with proper debug options enabled.

But even GFP_ATOMIC won't work when you want to support PREEMPT_RT as
that does not allow allocations in such contexts.

Look how x86 solves exactly this problem without allocations required.

> + if (!data) {
> + pr_warn("NO space for clean data\n");
> + return;
> + }
> + memcpy(data, adata, sizeof(struct loongarch_avec_data));
> + INIT_LIST_HEAD(&data->entry);
> +
> + list_add_tail(&data->entry, &plist->head);
> + loongson_send_ipi_single(adata->prev_cpu, SMP_CLEAR_VECT);
> + }
> + adata->prev_cpu = adata->cpu;
> + adata->prev_vec = adata->vec;
> +}
> +
> +static int loongarch_avec_set_affinity(struct irq_data *data,
> + const struct cpumask *dest, bool force)
> +{
> + struct cpumask intersect_mask;

No cpumasks on stack please. You can make that static as usage is always
serialized via loongarch_avec.lock

> + struct loongarch_avec_data *adata;
> + unsigned int cpu, vector;
> + unsigned long flags;
> + int ret = 0;
> +
> + raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
> + adata = irq_data_get_irq_chip_data(data);
> +
> + if (adata->vec && cpu_online(adata->cpu)
> + && cpumask_test_cpu(adata->cpu, dest)) {

Please align the condition proper when you need a line break:

if (adata->vec && cpu_online(adata->cpu) &&
cpumask_test_cpu(adata->cpu, dest)) {

But you don't need a line break here because

if (adata->vec && cpu_online(adata->cpu) && cpumask_test_cpu(adata->cpu, dest)) {

fits into the 100 character line width limit.

> + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
> + return 0;
> + }
> +
> + if (!cpumask_intersects(dest, cpu_online_mask)) {
> + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
> + return -EINVAL;
> + }
> +
> + cpumask_and(&intersect_mask, dest, cpu_online_mask);

The above intersect check is pointless as the matrix allocator already
checks the cpumask and returns -EINVAL if empty.

> +
> + ret = assign_irq_vector(data, &intersect_mask, &cpu, &vector);
> + if (ret) {
> + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
> + return ret;
> + }

> +void complete_irq_moving(int *restart)

What is the 'restart' argument for?

> +{
> + struct pending_list *plist = this_cpu_ptr(&pending_list);
> + struct loongarch_avec_data *adata, *tmp;
> + int cpu, vector;
> + u32 bias;
> + u64 irr;
> +
> + raw_spin_lock(&loongarch_avec.lock);
> +
> + list_for_each_entry_safe(adata, tmp, &plist->head, entry) {
> +
> + cpu = adata->prev_cpu;
> + vector = adata->prev_vec;
> + bias = vector/64;
> +
> + switch (bias) {
> + case 0x0:
> + irr = csr_read64(LOONGARCH_CSR_IRR0);
> + break;
> + case 0x1:
> + irr = csr_read64(LOONGARCH_CSR_IRR1);
> + break;
> + case 0x2:
> + irr = csr_read64(LOONGARCH_CSR_IRR2);
> + break;
> + case 0x3:
> + irr = csr_read64(LOONGARCH_CSR_IRR3);
> + break;
> + default:
> + return;

How can that happen ?

> + }

irr = csr_read64(LOONGARCH_CSR_IRR0 + vector / 64);

should be good enough, no?

Also please use a proper constant instead of '64', e.g. VECTORS_PER_IRR

> +
> + if (irr & (1UL << (vector % 64))) {
> + loongson_send_ipi_single(cpu, SMP_CLEAR_VECT);

So this sends an IPI to the current CPU. What guarantees that the
pending interrupt is handled _before_ the IPI is handled again?

> + continue;
> + }
> + list_del(&adata->entry);
> + irq_matrix_free(loongarch_avec.vector_matrix, cpu, vector, false);
> + this_cpu_ptr(irq_map)[vector] = 0;

s/0/NULL/ as this writes a pointer.

this_cpu_write(irq_map.irqd[vector], NULL);

avoids the whole pointer indirection.

> + kfree(adata);

Again this won't work with PREEMPT_RT.

> + }
> + raw_spin_unlock(&loongarch_avec.lock);
> +}
> +
> +static void loongarch_avec_dispatch(struct irq_desc *desc)
> +{
> + struct irq_chip *chip = irq_desc_get_chip(desc);
> + struct irq_data *d;
> + unsigned long vector;
> +
> + chained_irq_enter(chip, desc);
> + vector = csr_read64(LOONGARCH_CSR_ILR);
> + if (vector & 0x80000000)

No magic numbers. Please use proper constant defines.

> + return;
> +
> + vector &= 0xff;

Ditto.

> +
> + d = raw_cpu_ptr(irq_map)[vector];

Why raw?

d = __this_cpu_read(...);

Also, what is the point of storing irqdata in the irq_map if the only
thing you use is d->irq. You can simply store the interrupt number, no?

If you want to spare cycles for the lookup, then you want to store the
interrupt descriptor like x86 does.

> + if (d)
> + generic_handle_irq(d->irq);
> + else
> + pr_warn("IRQ ERROR:Unexpected irq occur on cpu %d[vector %d]\n",
> + smp_processor_id(), vector);

See bracket rules in the tip documentation.

> + chained_irq_exit(chip, desc);
> +}
> +
> +static int loongarch_avec_alloc(struct irq_domain *domain, unsigned int virq,
> + unsigned int nr_irqs, void *arg)
> +{
> + struct loongarch_avec_data *adata;
> + struct irq_data *irqd;
> + unsigned int cpu, vector;
> + unsigned long flags;
> + int i, err;

See variable declaration rules in the tip documentation

> + raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
> + for (i = 0; i < nr_irqs; i++) {
> + irqd = irq_domain_get_irq_data(domain, virq + i);
> + adata = kzalloc(sizeof(*adata), GFP_KERNEL);
> + if (!adata) {
> + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
> + return -ENOMEM;
> + }
> + err = assign_irq_vector(irqd, cpu_online_mask, &cpu, &vector);
> + if (err) {
> + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
> + return err;
> + }
> + adata->prev_cpu = adata->cpu = cpu;
> + adata->prev_vec = adata->vec = vector;
> +
> + per_cpu_ptr(irq_map, adata->cpu)[adata->vec] = irqd;

This needs to be set last, no?

> + irq_domain_set_info(domain, virq + i, virq, &loongarch_avec_controller,
> + adata, handle_edge_irq, NULL, NULL);
> + irqd_set_single_target(irqd);
> + irqd_set_affinity_on_activate(irqd);
> + }
> + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
> +
> + return err;
> +}
> +
> +static void loongarch_avec_free(struct irq_domain *domain, unsigned int virq,
> + unsigned int nr_irqs)
> +{
> + struct loongarch_avec_data *adata;
> + struct irq_data *d;
> + unsigned long flags;
> + unsigned int i;
> +
> + raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
> + for (i = 0; i < nr_irqs; i++) {
> + d = irq_domain_get_irq_data(domain, virq + i);
> + adata = irq_data_get_irq_chip_data(d);
> + if (d) {
> + irq_matrix_free(loongarch_avec.vector_matrix,
> + adata->cpu,
> + adata->vec, false);
> + irq_domain_reset_irq_data(d);
> + }

What cleans up the irq_map and pending cleanups? There is a UAF waiting
around the corner.

> + }
> +
> + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
> +}

> +static int __init loongarch_avec_init(struct irq_domain *parent)
> +{
> + int ret = 0, parent_irq;
> + unsigned long tmp;
> +
> + tmp = iocsr_read64(LOONGARCH_IOCSR_MISC_FUNC);
> + tmp |= IOCSR_MISC_FUNC_AVEC_EN;
> + iocsr_write64(tmp, LOONGARCH_IOCSR_MISC_FUNC);

Enabling AVEC _before_ everything is set up is a patently bad idea.

> + raw_spin_lock_init(&loongarch_avec.lock);
> +
> + loongarch_avec.fwnode = irq_domain_alloc_named_fwnode("CORE_AVEC");
> + if (!loongarch_avec.fwnode) {
> + pr_err("Unable to allocate domain handle\n");
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + loongarch_avec.domain = irq_domain_create_tree(loongarch_avec.fwnode,
> + &loongarch_avec_domain_ops, NULL);
> + if (!loongarch_avec.domain) {
> + pr_err("core-vec: cannot create IRQ domain\n");
> + ret = -ENOMEM;
> + goto out_free_handle;
> + }
> +
> + parent_irq = irq_create_mapping(parent, INT_AVEC);
> + if (!parent_irq) {
> + pr_err("Failed to mapping hwirq\n");
> + ret = -EINVAL;
> + goto out_remove_domain;
> + }
> + irq_set_chained_handler_and_data(parent_irq, loongarch_avec_dispatch, NULL);
> +
> + ret = irq_matrix_init();
> + if (ret) {
> + pr_err("Failed to init irq matrix\n");
> + goto out_free_matrix;
> + }
> +
> + return ret;
> +
> +out_free_matrix:
> + kfree(loongarch_avec.vector_matrix);
> +out_remove_domain:
> + irq_domain_remove(loongarch_avec.domain);
> +out_free_handle:
> + irq_domain_free_fwnode(loongarch_avec.fwnode);
> +out:
> + return ret;
> +}
> +
> +static int loongarch_avec_offline_cpu(unsigned int cpu)
> +{
> + unsigned long flags;
> + struct pending_list *plist = per_cpu_ptr(&pending_list, cpu);
> +
> + raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
> + if (list_empty(&plist->head)) {
> + irq_matrix_offline(loongarch_avec.vector_matrix);
> + } else {
> + pr_warn("cpu %d advanced extioi is busy\n");
> + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
> + return -EBUSY;
> + }
> + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
> + return 0;
> +}
> +
> +static int loongarch_avec_online_cpu(unsigned int cpu)
> +{
> + struct pending_list *plist = per_cpu_ptr(&pending_list, cpu);
> + unsigned long flags;
> +
> + raw_spin_lock_irqsave(&loongarch_avec.lock, flags);
> +
> + irq_matrix_online(loongarch_avec.vector_matrix);
> +
> + INIT_LIST_HEAD(&plist->head);
> +
> + raw_spin_unlock_irqrestore(&loongarch_avec.lock, flags);
> + return 0;
> +}
> +#if defined(CONFIG_ACPI)

Missing newline before #if and also please use #ifdef CONFIG_ACPI

> +static int __init pch_msi_parse_madt(union acpi_subtable_headers *header,
> + const unsigned long end)
> +{
> + struct acpi_madt_msi_pic *pchmsi_entry = (struct acpi_madt_msi_pic *)header;
> +
> + msi_base_v2 = pchmsi_entry->msg_address;
> + return pch_msi_acpi_init_v2(loongarch_avec.domain, pchmsi_entry);
> +}
> +
> +static inline int __init acpi_cascade_irqdomain_init(void)
> +{
> + return acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, pch_msi_parse_madt, 1);
> +}
> +
> +int __init loongarch_avec_acpi_init(struct irq_domain *parent)
> +{
> + int ret = 0;
> +
> + ret = loongarch_avec_init(parent);
> + if (ret) {
> + pr_err("Failed to init irq domain\n");
> + return ret;
> + }
> +
> + ret = acpi_cascade_irqdomain_init();
> + if (ret) {
> + pr_err("Failed to cascade IRQ domain\n");
> + return ret;
> + }
> +
> + ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
> + "loongarch_avec:online",
> + loongarch_avec_online_cpu, loongarch_avec_offline_cpu);

You cannot online/offline the matrix and handle eventually pending
cleanups from a CPUHP_AP_ONLINE_DYN state. That needs to happen in the
STARTING section between CPUHP_AP_OFFLINE and CPUHP_AP_ONLINE

> + if (ret < 0) {
> + pr_err("loongarch_avec: failed to register hotplug callbacks.\n");
> + return ret;
> + }
> +
> + return ret;

So if CONFIG_ACPI is disabled then loongarch_avec_init() is unused and
results in a defined but not used build warning...

> diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c
> index 6e1e1f011bb2..d1706080b4f4 100644
> --- a/drivers/irqchip/irq-loongson-pch-msi.c
> +++ b/drivers/irqchip/irq-loongson-pch-msi.c
> @@ -16,7 +16,6 @@
> #include <linux/slab.h>
>
> static int nr_pics;
> -
> struct pch_msi_data {
> struct mutex msi_map_lock;
> phys_addr_t doorbell;
> @@ -100,6 +99,17 @@ static struct irq_chip middle_irq_chip = {
> .irq_compose_msi_msg = pch_msi_compose_msi_msg,
> };
>
> +static struct irq_chip pch_msi_irq_chip_v2 = {
> + .name = "MSI",
> + .irq_ack = irq_chip_ack_parent,
> +};
> +
> +static struct msi_domain_info pch_msi_domain_info_v2 = {
> + .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
> + MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX,
> + .chip = &pch_msi_irq_chip_v2,
> +};
> +
> static int pch_msi_parent_domain_alloc(struct irq_domain *domain,
> unsigned int virq, int hwirq)
> {
> @@ -268,6 +278,9 @@ struct fwnode_handle *get_pch_msi_handle(int pci_segment)
> {
> int i;
>
> + if (cpu_has_avecint)
> + return pch_msi_handle[0];
> +
> for (i = 0; i < MAX_IO_PICS; i++) {
> if (msi_group[i].pci_segment == pci_segment)
> return pch_msi_handle[i];
> @@ -289,4 +302,34 @@ int __init pch_msi_acpi_init(struct irq_domain *parent,
>
> return ret;
> }
> +
> +int __init pch_msi_acpi_init_v2(struct irq_domain *parent,
> + struct acpi_madt_msi_pic *msi_entry)
> +{
> + struct irq_domain *msi_domain;
> +
> + if (pch_msi_handle[0])
> + return 0;
> +
> + pch_msi_handle[0] = irq_domain_alloc_named_fwnode("msipic-v2");
> + if (!pch_msi_handle[0]) {
> + pr_err("Unable to allocate domain handle\n");
> + kfree(pch_msi_handle[0]);
> + return -ENOMEM;
> + }
> +
> + msi_domain = pci_msi_create_irq_domain(pch_msi_handle[0],
> + &pch_msi_domain_info_v2,
> + parent);
> + if (!msi_domain) {
> + pr_err("Failed to create PCI MSI domain\n");
> + kfree(pch_msi_handle[0]);
> + return -ENOMEM;
> + }
> +
> + pr_info("IRQ domain MSIPIC-V2 init done.\n");
> + return 0;
> +}
> +
> +

Stray newlines. But as with the other CONFIG_ACPI part above a build
with CONFIG_ACPI=n will result in defined but not used warnings ....

Thanks,

tglx

Next message: Alexey Charkov: "Re: [PATCH v4 6/6] arm64: dts: rockchip: Add OPP data for CPU cores on RK3588"
Previous message: Hou Tao: "Re: WARNING in fuse_request_end"
In reply to: Tianyang Zhang: "Re: [PATCH 2/2] Loongarch:Support loongarch avec"
Next in thread: Tianyang Zhang: "Re: [PATCH 2/2] Loongarch:Support loongarch avec"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]