Re: [PATCH 6/7] iommu/vt-d: Add IOMMU perfmon overflow handler support

From: Liang, Kan
Date: Mon Jan 16 2023 - 10:23:01 EST




On 2023-01-14 6:05 a.m., Baolu Lu wrote:
> On 2023/1/12 4:25, kan.liang@xxxxxxxxxxxxxxx wrote:
>> From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
>>
>> While enabled to count events and an event occurrence causes the counter
>> value to increment and roll over to or past zero, this is termed a
>> counter overflow. The overflow can trigger an interrupt. The IOMMU
>> perfmon needs to handle the case properly.
>>
>> New HW IRQs are allocated for each IOMMU device for perfmon. The IRQ IDs
>> are after the SVM range.
>>
>> In the overflow handler, the counter is not frozen. It's very unlikely
>> that the same counter overflows again during the period. But it's
>> possible that other counters overflow at the same time. Read the
>> overflow register at the end of the handler and check whether there are
>> more.
>>
>> Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
>> ---
>>   drivers/iommu/intel/dmar.c    |  2 +
>>   drivers/iommu/intel/iommu.h   | 11 ++++-
>>   drivers/iommu/intel/perfmon.c | 82 +++++++++++++++++++++++++++++++++++
>>   drivers/iommu/intel/svm.c     |  2 +-
>>   4 files changed, 95 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
>> index dcafa32875c1..94e314320cd9 100644
>> --- a/drivers/iommu/intel/dmar.c
>> +++ b/drivers/iommu/intel/dmar.c
>> @@ -1887,6 +1887,8 @@ static inline int dmar_msi_reg(struct
>> intel_iommu *iommu, int irq)
>>           return DMAR_FECTL_REG;
>>       else if (iommu->pr_irq == irq)
>>           return DMAR_PECTL_REG;
>> +    else if (iommu->perf_irq == irq)
>> +        return DMAR_PERFINTRCTL_REG;
>>       else
>>           BUG();
>>   }
>> diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
>> index bbc5dda903e9..f616e4f3d765 100644
>> --- a/drivers/iommu/intel/iommu.h
>> +++ b/drivers/iommu/intel/iommu.h
>> @@ -130,6 +130,8 @@
>>   #define DMAR_PERFCFGOFF_REG    0x310
>>   #define DMAR_PERFOVFOFF_REG    0x318
>>   #define DMAR_PERFCNTROFF_REG    0x31c
>> +#define DMAR_PERFINTRSTS_REG    0x324
>> +#define DMAR_PERFINTRCTL_REG    0x328
>>   #define DMAR_PERFEVNTCAP_REG    0x380
>>   #define DMAR_ECMD_REG        0x400
>>   #define DMAR_ECEO_REG        0x408
>> @@ -357,6 +359,9 @@
>>     #define DMA_VCS_PAS    ((u64)1)
>>   +/* PERFINTRSTS_REG */
>> +#define DMA_PERFINTRSTS_PIS    ((u32)1)
>> +
>>   #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts)            \
>>   do {                                    \
>>       cycles_t start_time = get_cycles();                \
>> @@ -630,8 +635,12 @@ struct iommu_pmu {
>>       struct pmu        pmu;
>>       DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX);
>>       struct perf_event    *event_list[IOMMU_PMU_IDX_MAX];
>> +    unsigned char        irq_name[16];
>>   };
>>   +#define IOMMU_IRQ_ID_OFFSET_PRQ        (DMAR_UNITS_SUPPORTED)
>> +#define IOMMU_IRQ_ID_OFFSET_PERF    (2 * DMAR_UNITS_SUPPORTED)
>> +
>>   struct intel_iommu {
>>       void __iomem    *reg; /* Pointer to hardware regs, virtual addr */
>>       u64         reg_phys; /* physical address of hw register set */
>> @@ -645,7 +654,7 @@ struct intel_iommu {
>>       int        seq_id;    /* sequence id of the iommu */
>>       int        agaw; /* agaw of this iommu */
>>       int        msagaw; /* max sagaw of this iommu */
>> -    unsigned int     irq, pr_irq;
>> +    unsigned int    irq, pr_irq, perf_irq;
>>       u16        segment;     /* PCI segment# */
>>       unsigned char     name[13];    /* Device Name */
>>   diff --git a/drivers/iommu/intel/perfmon.c
>> b/drivers/iommu/intel/perfmon.c
>> index f332232bb345..d0fbf784c827 100644
>> --- a/drivers/iommu/intel/perfmon.c
>> +++ b/drivers/iommu/intel/perfmon.c
>> @@ -476,6 +476,49 @@ static void iommu_pmu_disable(struct pmu *pmu)
>>       ecmd_submit_sync(iommu, DMA_ECMD_FREEZE, 0, false, 0);
>>   }
>>   +static void iommu_pmu_counter_overflow(struct iommu_pmu *iommu_pmu)
>> +{
>> +    struct perf_event *event;
>> +    int i, handled = 0;
>> +    u64 status;
>> +
>> +    /*
>> +     * Two counters may be overflowed very close.
>> +     * Always check whether there are more to handle.
>> +     */
>
> Keep comment style consistent, like this
>
>         /*
>          * Two counters may be overflowed very close. Always check whether
>          * there are more to handle.
>          */
>
> Same to other places.

Sure.

>
>> +    while ((status = dmar_readq(iommu_pmu->overflow))) {
>
> Unnecessary double brackets?
>
>> +        for_each_set_bit(i, (unsigned long *)&status,
>> iommu_pmu->num_cntr) {
>> +            handled++;
>
> "handled" isn't used anywhere. Please cleanup it.
>

Sure.

>> +
>> +            /*
>> +             * Find the assigned event of the counter.
>> +             * Accumulate the value into the event->count.
>> +             */
>> +            event = iommu_pmu->event_list[i];
>> +            if (WARN_ON_ONCE(!event))
>
> Again, do we need a kernel trace here? This is only because the hardware
> reported an wrong event id, right? How about a pr_warn() to let the user
> know this?

The hardware reported ID doesn't match the SW records. It's hard to say
which one is correct. But I guess pr_warn_once() may be enough. I will
change it in V2.

Thanks,
Kan

>
>> +                continue;
>> +            iommu_pmu_event_update(event);
>> +        }
>> +
>> +        dmar_writeq(iommu_pmu->overflow, status);
>> +    }
>> +}
>> +
>> +static irqreturn_t iommu_pmu_irq_handler(int irq, void *dev_id)
>> +{
>> +    struct intel_iommu *iommu = dev_id;
>> +
>> +    if (!dmar_readl(iommu->reg + DMAR_PERFINTRSTS_REG))
>> +        return IRQ_NONE;
>> +
>> +    iommu_pmu_counter_overflow(iommu->pmu);
>> +
>> +    /* Clear the status bit */
>> +    dmar_writel(iommu->reg + DMAR_PERFINTRSTS_REG, DMA_PERFINTRSTS_PIS);
>> +
>> +    return IRQ_HANDLED;
>> +}
>> +
>>   static int __iommu_pmu_register(struct intel_iommu *iommu)
>>   {
>>       struct iommu_pmu *iommu_pmu = iommu->pmu;
>> @@ -658,6 +701,38 @@ void free_iommu_pmu(struct intel_iommu *iommu)
>>       iommu->pmu = NULL;
>>   }
>>   +static int iommu_pmu_set_interrupt(struct intel_iommu *iommu)
>> +{
>> +    struct iommu_pmu *iommu_pmu = iommu->pmu;
>> +    int irq, ret;
>> +
>> +    irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PERF + iommu->seq_id,
>> iommu->node, iommu);
>> +    if (irq <= 0)
>> +        return -EINVAL;
>> +
>> +    snprintf(iommu_pmu->irq_name, sizeof(iommu_pmu->irq_name),
>> "dmar%d-perf", iommu->seq_id);
>> +
>> +    iommu->perf_irq = irq;
>> +    ret = request_threaded_irq(irq, NULL, iommu_pmu_irq_handler,
>> +                   IRQF_ONESHOT, iommu_pmu->irq_name, iommu);
>> +    if (ret) {
>> +        dmar_free_hwirq(irq);
>> +        iommu->perf_irq = 0;
>> +        return ret;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu)
>> +{
>> +    if (!iommu->perf_irq)
>> +        return;
>> +
>> +    free_irq(iommu->perf_irq, iommu);
>> +    dmar_free_hwirq(iommu->perf_irq);
>> +    iommu->perf_irq = 0;
>> +}
>> +
>>   static int iommu_pmu_cpu_online(unsigned int cpu)
>>   {
>>       if (cpumask_empty(&iommu_pmu_cpu_mask))
>> @@ -734,8 +809,14 @@ void iommu_pmu_register(struct intel_iommu *iommu)
>>       if (iommu_pmu_cpuhp_setup(iommu_pmu))
>>           goto unregister;
>>   +    /* Set interrupt for overflow */
>> +    if (iommu_pmu_set_interrupt(iommu))
>> +        goto cpuhp_free;
>> +
>>       return;
>>   +cpuhp_free:
>> +    iommu_pmu_cpuhp_free(iommu_pmu);
>>   unregister:
>>       perf_pmu_unregister(&iommu_pmu->pmu);
>>   err:
>> @@ -749,6 +830,7 @@ void iommu_pmu_unregister(struct intel_iommu *iommu)
>>       if (!iommu_pmu)
>>           return;
>>   +    iommu_pmu_unset_interrupt(iommu);
>>       iommu_pmu_cpuhp_free(iommu_pmu);
>>       perf_pmu_unregister(&iommu_pmu->pmu);
>>   }
>> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
>> index c76b66263467..b6c5edd80d5d 100644
>> --- a/drivers/iommu/intel/svm.c
>> +++ b/drivers/iommu/intel/svm.c
>> @@ -79,7 +79,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
>>       }
>>       iommu->prq = page_address(pages);
>>   -    irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id,
>> iommu->node, iommu);
>> +    irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id,
>> iommu->node, iommu);
>>       if (irq <= 0) {
>>           pr_err("IOMMU: %s: Failed to create IRQ vector for page
>> request queue\n",
>>                  iommu->name);
>
> --
> Best regards,
> baolu