Re: [PATCH v3 1/2] x86/mce/AMD: Redo logging of errors from APIC LVT interrupts

From: Borislav Petkov
Date: Wed Apr 26 2017 - 09:55:49 EST


On Tue, Apr 25, 2017 at 02:16:11PM -0500, Yazen Ghannam wrote:
> From: Yazen Ghannam <yazen.ghannam@xxxxxxx>
>
> We have support for the new SMCA MCA_DE{STAT,ADDR} registers in Linux. So
> we've used these registers in place of MCA_{STATUS,ADDR} on SMCA systems.
> However, the guidance for current implementations of SMCA is to continue
> using MCA_{STATUS,ADDR} and to use MCA_DE{STAT,ADDR} only if a Deferred
> error was not found in the former registers. If we logged a Deferred error
> in MCA_STATUS then we should also clear MCA_DESTAT. This also means we
> shouldn't clear MCA_CONFIG[LogDeferredInMcaStat].
>
> Don't clear MCA_CONFIG[LogDeferredInMcaStat] during AMD mcheck init.
>
> Don't break after finding the first error in either the Deferred or
> Thresholding interrupt handlers.
>
> Rework __log_error() to only log error. Most valid checks are moved into
> __log_error_* helper functions.
>
> Write the Deferred error __log_error_* helper function to follow the
> guidance for current SMCA systems.
>
> Signed-off-by: Yazen Ghannam <yazen.ghannam@xxxxxxx>
> ---

...

> +static void
> +__log_error_deferred(unsigned int bank)
> +{
> + u64 status, addr;
> + bool logged_deferred = false;
> +
> + rdmsrl(msr_ops.status(bank), status);
> +
> + /* Log any valid error we find. */
> + if (status & MCI_STATUS_VAL) {
> + rdmsrl(msr_ops.addr(bank), addr);

Check ADDRV here and in all cases below before reading MCi_ADDR.

> +
> + __log_error(bank, status, addr, 0);
> +
> + wrmsrl(msr_ops.status(bank), 0);
> +
> + logged_deferred = !!(status & MCI_STATUS_DEFERRED);
> + }
> +
> + if (!mce_flags.smca)
> + return;
> +
> + /* Clear MCA_DESTAT if we logged the deferred error from MCA_STATUS. */
> + if (logged_deferred) {

if (status & MCI_STATUS_DEFERRED)

is fine and you can drop the bool.

Oh well, here's an updated version with those suggestions incorporated.

Also, I've carved out the common functionality into a _log_error_bank()
which is more compact. Now it is even easier to follow what
log_error_deferred() does.

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 41439ab41102..3688a7fbb0bb 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -472,20 +472,6 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
smca_high |= BIT(0);

/*
- * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
- * registers with the option of additionally logging to
- * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
- *
- * This bit is usually set by BIOS to retain the old behavior
- * for OSes that don't use the new registers. Linux supports the
- * new registers so let's disable that additional logging here.
- *
- * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
- * portion of the MSR).
- */
- smca_high &= ~BIT(2);
-
- /*
* SMCA sets the Deferred Error Interrupt type per bank.
*
* MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us
@@ -756,36 +742,19 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);

static void
-__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
+__log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
{
- u32 msr_status = msr_ops.status(bank);
- u32 msr_addr = msr_ops.addr(bank);
struct mce m;
- u64 status;
-
- WARN_ON_ONCE(deferred_err && threshold_err);
-
- if (deferred_err && mce_flags.smca) {
- msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
- msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
- }
-
- rdmsrl(msr_status, status);
-
- if (!(status & MCI_STATUS_VAL))
- return;

mce_setup(&m);

m.status = status;
+ m.misc = misc;
m.bank = bank;
m.tsc = rdtsc();

- if (threshold_err)
- m.misc = misc;
-
if (m.status & MCI_STATUS_ADDRV) {
- rdmsrl(msr_addr, m.addr);
+ m.addr = addr;

/*
* Extract [55:<lsb>] where lsb is the least significant
@@ -806,8 +775,6 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
}

mce_log(&m);
-
- wrmsrl(msr_status, 0);
}

static inline void __smp_deferred_error_interrupt(void)
@@ -832,45 +799,86 @@ asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void)
exiting_ack_irq();
}

-/* APIC interrupt handler for deferred errors */
-static void amd_deferred_error_interrupt(void)
+/*
+ * Returns true if the logged error is deferred. False, otherwise.
+ */
+static inline bool
+_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
{
- unsigned int bank;
- u32 msr_status;
- u64 status;
+ u64 status, addr = 0;

- for (bank = 0; bank < mca_cfg.banks; ++bank) {
- msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
- : msr_ops.status(bank);
+ rdmsrl(msr_stat, status);

- rdmsrl(msr_status, status);
+ if (!(status & MCI_STATUS_VAL))
+ return false;

- if (!(status & MCI_STATUS_VAL) ||
- !(status & MCI_STATUS_DEFERRED))
- continue;
+ if (status & MCI_STATUS_ADDRV)
+ rdmsrl(msr_addr, addr);

- __log_error(bank, true, false, 0);
- break;
- }
+ __log_error(bank, status, addr, misc);
+
+ wrmsrl(status, 0);
+
+ return status & MCI_STATUS_DEFERRED;
}

/*
- * APIC Interrupt Handler
+ * We have three scenarios for checking for Deferred errors:
+ *
+ * 1) Non-SMCA systems check MCA_STATUS and log error if found.
+ * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
+ * clear MCA_DESTAT.
+ * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
+ * log it.
*/
+static void log_error_deferred(unsigned int bank)
+{
+ bool defrd;
+
+ defrd = _log_error_bank(bank, msr_ops.status(bank),
+ msr_ops.addr(bank), 0);
+
+ if (!mce_flags.smca)
+ return;
+
+ /* Clear MCA_DESTAT if we logged the deferred error from MCA_STATUS. */
+ if (defrd) {
+ wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
+ return;
+ }
+
+ /*
+ * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
+ * for a valid error.
+ */
+ _log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank),
+ MSR_AMD64_SMCA_MCx_DEADDR(bank), 0);
+}
+
+/* APIC interrupt handler for deferred errors */
+static void amd_deferred_error_interrupt(void)
+{
+ unsigned int bank;
+
+ for (bank = 0; bank < mca_cfg.banks; ++bank)
+ log_error_deferred(bank);
+}
+
+static void log_error_thresholding(unsigned int bank, u64 misc)
+{
+ _log_error_bank(bank, msr_ops.status(bank), msr_ops.addr(bank), misc);
+}

/*
- * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
- * the interrupt goes off when error_count reaches threshold_limit.
- * the handler will simply log mcelog w/ software defined bank number.
+ * Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt
+ * goes off when error_count reaches threshold_limit.
*/
-
static void amd_threshold_interrupt(void)
{
u32 low = 0, high = 0, address = 0;
unsigned int bank, block, cpu = smp_processor_id();
struct thresh_restart tr;

- /* assume first bank caused it */
for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
@@ -893,23 +901,18 @@ static void amd_threshold_interrupt(void)
(high & MASK_LOCKED_HI))
continue;

- /*
- * Log the machine check that caused the threshold
- * event.
- */
- if (high & MASK_OVERFLOW_HI)
- goto log;
- }
- }
- return;
+ if (!(high & MASK_OVERFLOW_HI))
+ continue;

-log:
- __log_error(bank, false, true, ((u64)high << 32) | low);
+ /* Log the MCE which caused the threshold event. */
+ log_error_thresholding(bank, ((u64)high << 32) | low);

- /* Reset threshold block after logging error. */
- memset(&tr, 0, sizeof(tr));
- tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block];
- threshold_restart_bank(&tr);
+ /* Reset threshold block after logging error. */
+ memset(&tr, 0, sizeof(tr));
+ tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block];
+ threshold_restart_bank(&tr);
+ }
+ }
}

/*
--
2.11.0

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.